view scripts/io/textscan.m @ 17338:1c89599167a6

maint: End m-files with 1 blank line. Simplifies automated grammarchecking script. * scripts/@ftp/ascii.m, scripts/@ftp/binary.m, scripts/@ftp/cd.m, scripts/@ftp/close.m, scripts/@ftp/delete.m, scripts/@ftp/dir.m, scripts/@ftp/display.m, scripts/@ftp/ftp.m, scripts/@ftp/loadobj.m, scripts/@ftp/mget.m, scripts/@ftp/mkdir.m, scripts/@ftp/mput.m, scripts/@ftp/rename.m, scripts/@ftp/rmdir.m, scripts/@ftp/saveobj.m, scripts/audio/lin2mu.m, scripts/audio/loadaudio.m, scripts/audio/mu2lin.m, scripts/audio/record.m, scripts/audio/saveaudio.m, scripts/audio/setaudio.m, scripts/deprecated/__error_text__.m, scripts/deprecated/cut.m, scripts/deprecated/error_text.m, scripts/deprecated/isstr.m, scripts/deprecated/polyderiv.m, scripts/deprecated/studentize.m, scripts/deprecated/sylvester_matrix.m, scripts/general/bicubic.m, scripts/general/celldisp.m, scripts/general/colon.m, scripts/general/cplxpair.m, scripts/general/del2.m, scripts/general/display.m, scripts/general/isdir.m, scripts/general/isequaln.m, scripts/general/loadobj.m, scripts/general/private/__isequal__.m, scripts/general/private/__splinen__.m, scripts/general/profexplore.m, scripts/general/quadgk.m, scripts/general/randi.m, scripts/general/repmat.m, scripts/general/saveobj.m, scripts/geometry/delaunay.m, scripts/help/__unimplemented__.m, scripts/help/doc_cache_create.m, scripts/help/get_first_help_sentence.m, scripts/help/help.m, scripts/help/print_usage.m, scripts/help/private/__additional_help_message__.m, scripts/help/private/__strip_html_tags__.m, scripts/help/type.m, scripts/image/imfinfo.m, scripts/image/imformats.m, scripts/image/imread.m, scripts/image/imwrite.m, scripts/image/private/__imfinfo__.m, scripts/image/private/__imread__.m, scripts/image/private/__imwrite__.m, scripts/image/private/imageIO.m, scripts/image/private/imwrite_filename.m, scripts/image/private/ind2x.m, scripts/io/beep.m, scripts/io/strread.m, scripts/io/textread.m, scripts/io/textscan.m, scripts/linear-algebra/krylov.m, scripts/linear-algebra/subspace.m, scripts/miscellaneous/bug_report.m, scripts/miscellaneous/bunzip2.m, scripts/miscellaneous/cast.m, scripts/miscellaneous/copyfile.m, scripts/miscellaneous/debug.m, scripts/miscellaneous/dir.m, scripts/miscellaneous/dump_prefs.m, scripts/miscellaneous/error_ids.m, scripts/miscellaneous/fileattrib.m, scripts/miscellaneous/gunzip.m, scripts/miscellaneous/isdeployed.m, scripts/miscellaneous/ismac.m, scripts/miscellaneous/mex.m, scripts/miscellaneous/mexext.m, scripts/miscellaneous/mkoctfile.m, scripts/miscellaneous/movefile.m, scripts/miscellaneous/namelengthmax.m, scripts/miscellaneous/news.m, scripts/miscellaneous/pack.m, scripts/miscellaneous/perl.m, scripts/miscellaneous/private/display_info_file.m, scripts/miscellaneous/python.m, scripts/miscellaneous/rmappdata.m, scripts/miscellaneous/run.m, scripts/miscellaneous/tar.m, scripts/miscellaneous/tempname.m, scripts/miscellaneous/untar.m, scripts/miscellaneous/unzip.m, scripts/miscellaneous/what.m, scripts/miscellaneous/zip.m, scripts/optimization/fminunc.m, scripts/optimization/fsolve.m, scripts/optimization/fzero.m, scripts/optimization/glpk.m, scripts/optimization/optimget.m, scripts/optimization/optimset.m, scripts/optimization/qp.m, scripts/optimization/sqp.m, scripts/path/pathdef.m, scripts/pkg/pkg.m, scripts/pkg/private/build.m, scripts/pkg/private/describe.m, scripts/pkg/private/dirempty.m, scripts/pkg/private/get_forge_download.m, scripts/pkg/private/get_forge_pkg.m, scripts/pkg/private/get_unsatisfied_deps.m, scripts/pkg/private/install.m, scripts/pkg/private/is_architecture_dependent.m, scripts/pkg/private/list_forge_packages.m, scripts/pkg/private/rebuild.m, scripts/pkg/private/shell.m, scripts/pkg/private/uninstall.m, scripts/plot/axes.m, scripts/plot/box.m, scripts/plot/closereq.m, scripts/plot/diffuse.m, scripts/plot/ezpolar.m, scripts/plot/findfigs.m, scripts/plot/gco.m, scripts/plot/guidata.m, scripts/plot/guihandles.m, scripts/plot/hdl2struct.m, scripts/plot/linkprop.m, scripts/plot/peaks.m, scripts/plot/print.m, scripts/plot/private/__add_datasource__.m, scripts/plot/private/__axis_label__.m, scripts/plot/private/__clabel__.m, scripts/plot/private/__color_str_rgb__.m, scripts/plot/private/__contour__.m, scripts/plot/private/__default_plot_options__.m, scripts/plot/private/__errcomm__.m, scripts/plot/private/__file_filter__.m, scripts/plot/private/__fltk_file_filter__.m, scripts/plot/private/__getlegenddata__.m, scripts/plot/private/__gnuplot_open_stream__.m, scripts/plot/private/__gnuplot_print__.m, scripts/plot/private/__go_draw_axes__.m, scripts/plot/private/__interp_cube__.m, scripts/plot/private/__is_function__.m, scripts/plot/private/__line__.m, scripts/plot/private/__marching_cube__.m, scripts/plot/private/__next_line_style__.m, scripts/plot/private/__patch__.m, scripts/plot/private/__pie__.m, scripts/plot/private/__pltopt__.m, scripts/plot/private/__quiver__.m, scripts/plot/private/__scatter__.m, scripts/plot/private/__stem__.m, scripts/plot/private/__uigetdir_fltk__.m, scripts/plot/private/__uigetfile_fltk__.m, scripts/plot/private/__uiobject_split_args__.m, scripts/plot/private/__uiputfile_fltk__.m, scripts/plot/refresh.m, scripts/plot/saveas.m, scripts/plot/shg.m, scripts/plot/specular.m, scripts/plot/sphere.m, scripts/plot/struct2hdl.m, scripts/plot/subplot.m, scripts/plot/uicontextmenu.m, scripts/plot/uicontrol.m, scripts/plot/uipanel.m, scripts/plot/uipushtool.m, scripts/plot/uiresume.m, scripts/plot/uitoggletool.m, scripts/plot/uitoolbar.m, scripts/plot/uiwait.m, scripts/plot/waitforbuttonpress.m, scripts/polynomial/pchip.m, scripts/polynomial/polyeig.m, scripts/polynomial/ppval.m, scripts/prefs/addpref.m, scripts/prefs/getpref.m, scripts/prefs/ispref.m, scripts/prefs/private/loadprefs.m, scripts/prefs/private/prefsfile.m, scripts/prefs/private/saveprefs.m, scripts/prefs/setpref.m, scripts/set/private/validargs.m, scripts/set/unique.m, scripts/signal/arch_fit.m, scripts/signal/arch_rnd.m, scripts/signal/arch_test.m, scripts/signal/arma_rnd.m, scripts/signal/durbinlevinson.m, scripts/signal/fractdiff.m, scripts/signal/freqz.m, scripts/signal/freqz_plot.m, scripts/signal/hurst.m, scripts/signal/periodogram.m, scripts/signal/private/rectangle_lw.m, scripts/signal/private/rectangle_sw.m, scripts/signal/private/triangle_sw.m, scripts/signal/spectral_adf.m, scripts/signal/spectral_xdf.m, scripts/signal/stft.m, scripts/signal/synthesis.m, scripts/signal/yulewalker.m, scripts/sparse/colperm.m, scripts/sparse/eigs.m, scripts/sparse/etreeplot.m, scripts/sparse/gmres.m, scripts/sparse/private/__sprand_impl__.m, scripts/sparse/spdiags.m, scripts/sparse/sprandn.m, scripts/specfun/bessel.m, scripts/specfun/betaln.m, scripts/specfun/expint.m, scripts/special-matrix/gallery.m, scripts/startup/__finish__.m, scripts/statistics/base/qqplot.m, scripts/statistics/distributions/tcdf.m, scripts/statistics/distributions/wienrnd.m, scripts/statistics/models/logistic_regression.m, scripts/statistics/models/private/logistic_regression_derivatives.m, scripts/statistics/models/private/logistic_regression_likelihood.m, scripts/statistics/tests/anova.m, scripts/statistics/tests/bartlett_test.m, scripts/statistics/tests/chisquare_test_homogeneity.m, scripts/statistics/tests/chisquare_test_independence.m, scripts/statistics/tests/cor_test.m, scripts/statistics/tests/f_test_regression.m, scripts/statistics/tests/hotelling_test.m, scripts/statistics/tests/hotelling_test_2.m, scripts/statistics/tests/kolmogorov_smirnov_test_2.m, scripts/statistics/tests/kruskal_wallis_test.m, scripts/statistics/tests/manova.m, scripts/statistics/tests/mcnemar_test.m, scripts/statistics/tests/prop_test_2.m, scripts/statistics/tests/run_test.m, scripts/statistics/tests/sign_test.m, scripts/statistics/tests/t_test.m, scripts/statistics/tests/t_test_2.m, scripts/statistics/tests/t_test_regression.m, scripts/statistics/tests/u_test.m, scripts/statistics/tests/var_test.m, scripts/statistics/tests/welch_test.m, scripts/statistics/tests/wilcoxon_test.m, scripts/statistics/tests/z_test.m, scripts/statistics/tests/z_test_2.m, scripts/strings/strcat.m, scripts/strings/strjoin.m, scripts/strings/strsplit.m, scripts/testfun/__have_feature__.m, scripts/testfun/__printf_assert__.m, scripts/testfun/__prog_output_assert__.m, scripts/testfun/__run_test_suite__.m, scripts/time/clock.m, scripts/time/datenum.m, scripts/ui/errordlg.m, scripts/ui/private/message_dialog.m: End m-files with 1 blank line.
author Rik <rik@octave.org>
date Wed, 28 Aug 2013 08:33:02 -0700
parents b81b9d079515
children 2ce1f1966fc3
line wrap: on
line source

## Copyright (C) 2010-2013 Ben Abbott
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn  {Function File} {@var{C} =} textscan (@var{fid}, @var{format})
## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{n})
## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{param}, @var{value}, @dots{})
## @deftypefnx {Function File} {@var{C} =} textscan (@var{fid}, @var{format}, @var{n}, @var{param}, @var{value}, @dots{})
## @deftypefnx {Function File} {@var{C} =} textscan (@var{str}, @dots{})
## @deftypefnx {Function File} {[@var{C}, @var{position}] =} textscan (@var{fid}, @dots{})
## Read data from a text file or string.
##
## The string @var{str} or file associated with @var{fid} is read from and
## parsed according to @var{format}.  The function behaves like @code{strread}
## except it can also read from file instead of a string.  See the documentation
## of @code{strread} for details.
##
## In addition to the options supported by @code{strread}, this function
## supports a few more:
##
## @itemize
## @item @qcode{"collectoutput"}:
## A value of 1 or true instructs textscan to concatenate consecutive columns
## of the same class in the output cell array.  A value of 0 or false (default)
## leaves output in distinct columns.
##
## @item @qcode{"endofline"}:
## Specify @qcode{"\r"}, @qcode{"\n"} or @qcode{"\r\n"} (for CR, LF, or
## CRLF).  If no value is given, it will be inferred from the file.  If set
## to "" (empty string) EOLs are ignored as delimiters and added to
## whitespace.
##
## @item @qcode{"headerlines"}:
## The first @var{value} number of lines of @var{fid} are skipped.
##
## @item @qcode{"returnonerror"}:
## If set to numerical 1 or true (default), return normally when read errors
## have been encountered.  If set to 0 or false, return an error and no data.
## As the string or file is read by columns rather than by rows, and because
## textscan is fairly forgiving as regards read errors, setting this option
## may have little or no actual effect.
## @end itemize
##
## When reading from a character string, optional input argument @var{n}
## specifies the number of times @var{format} should be used (i.e., to limit
## the amount of data read).
## When reading from file, @var{n} specifies the number of data lines to read;
## in this sense it differs slightly from the format repeat count in strread.
##
## The output @var{C} is a cell array whose second dimension is determined
## by the number of format specifiers.
##
## The second output, @var{position}, provides the position, in characters,
## from the beginning of the file.
##
## If the format string is empty (not: omitted) and the file contains only
## numeric data (excluding headerlines), textscan will return data in a number
## of columns matching the number of numeric fields on the first data line of
## the file.
##
## @seealso{dlmread, fscanf, load, strread, textread}
## @end deftypefn

## Author: Ben Abbott <bpabbott@mac.com>

function [C, position] = textscan (fid, format = "%f", varargin)

  BUFLENGTH = 4096;               # Read buffer
  emptfmt = 0;                    # Signals deliberately empty format string

  ## Check input
  if (nargin < 1)
    print_usage ();
  endif

  if (isempty (format))
    emptfmt = 1;
    format = "%f";
  endif

  if (! ischar (format))
    error ("textscan: FORMAT must be a string");
  endif

  ## Determine the number of data fields & initialize output array
  num_fields = numel (strfind (format, "%")) - numel (strfind (format, "%*"));
  C = cell (1, num_fields);

  if (! (isa (fid, "double") && fid > 0) && ! ischar (fid))
    error ("textscan: first argument must be a file id or character string");
  endif

  args = varargin;
  if (nargin > 2 && isnumeric (args{1}))
    nlines = args{1};
  else
    nlines = Inf;
  endif
  if (nlines < 1)
    printf ("textscan: N = 0, no data read\n");
    return;  endif

  if (! any (strcmpi (args, "emptyvalue")))
    ## Matlab returns NaNs for missing values
    args(end+1:end+2) = {'emptyvalue', NaN};
  endif

  ## Check default parameter values that differ for strread & textread

  ipos = find (strcmpi (args, "whitespace"));
  if (isempty (ipos))
    ## Matlab default whitespace = " \b\t"
    args(end+1:end+2) = {'whitespace', " \b\t"};
    whitespace = " \b\t";
  else
    ## Check if there's at least one string format specifier
    has_str_fmt = regexp (format, '%[*]?\d*s', "once");
    ## If there is a string format AND whitespace value = empty,
    ## don't add a space (char(32)) to whitespace
    if (! (isempty (args{ipos+1}) && has_str_fmt))
      args{ipos+1} = unique ([" ", args{ipos+1}]);
    endif
  endif

  if (! any (strcmpi (args, "delimiter")))
    ## Matlab says default delimiter = whitespace.
    ## strread() will pick this up further
    args(end+1:end+2) = {'delimiter', ""};
    delimiter = "";
  else
    delimiter = args{find (strcmpi (args, "delimiter")) + 1};
  endif

  collop = false;
  ipos = find (strcmpi (args, "collectoutput"));
  if (! isempty (ipos))
    ## Search & concatenate consecutive columns of same class requested
    if (isscalar (args{ipos+1})
        && (islogical (args{ipos+1}) || isnumeric (args{ipos+1})))
      collop = args{ipos+1};
    else
      warning ("textscan: illegal value for CollectOutput parameter - ignored");
    endif
    ## Remove argument before call to strread() below
    args(ipos:ipos+1) = [];
  endif

  if (any (strcmpi (args, "returnonerror")))
    ## Because of the way strread() reads data (columnwise) this parameter
    ## can't be neatly implemented.  strread() will pick it up anyway
    warning ('textscan: ReturnOnError is not fully implemented');
  else
    ## Set default value (=true)
    args(end+1:end+2) = {"returnonerror", 1};
  endif

  ## Check if a headerlines argument is specified
  headerlines = find (strcmpi (args, "headerlines"), 1);
  if (! isempty (headerlines))
    ## Yep. But it is stray when reading from strings...
    if (ischar (fid))
      warning ("textscan: 'headerlines' ignored when reading from strings");
    endif
  endif

  if (ischar (fid))
    ## Read from a text string
    if (nargout == 2)
      error ("textscan: cannot provide position information for character input");
    endif
    str = fid;
  else
    st_pos = ftell (fid);
    ## Skip header lines if requested
    if (! isempty (headerlines))
      ## Beware of missing or wrong headerline value
      if (headerlines  == numel (args)
         || ! isnumeric (args{headerlines + 1}))
        error ("Missing or illegal value for 'headerlines'" );
      endif
      ## Avoid conveying floats to fskipl
      args{headerlines + 1} = round (args{headerlines + 1});
      if (args{headerlines + 1} > 0)
        ## Beware of zero valued headerline, fskipl would skip to EOF
        fskipl (fid, args{headerlines + 1});
        args(headerlines:headerlines+1) = [];
        st_pos = ftell (fid);
      elseif (args{headerlines + 1} < 0)
        warning ("textscan.m: negative headerline value ignored");
      endif
    endif    
    ## Read a first file chunk. Rest follows after endofline processing
    [str, count] = fscanf (fid, "%c", BUFLENGTH);

  endif

  ## Check for empty result
  if (isempty (str))
    warning ("textscan: no data read");
    return;
  endif

  ## Check value of 'endofline'.  String or file doesn't seem to matter
  endofline = find (strcmpi (args, "endofline"), 1);
  if (! isempty (endofline))
    if (ischar (args{endofline + 1}))
      eol_char = args{endofline + 1};
      if (! any (strcmp (eol_char, {"", "\n", "\r", "\r\n"})))
        error ("textscan: illegal EndOfLine character value specified");
      endif
    else
      error ("textscan: character value required for EndOfLine");
    endif
  else
    if (! ischar (fid))
    ## Determine EOL from file.  Search for EOL candidates in first BUFLENGTH chars
    eol_srch_len = min (length (str), BUFLENGTH);
    ## First try DOS (CRLF)
    if (! isempty (strfind (str(1 : eol_srch_len), "\r\n")))
      eol_char = "\r\n";
    ## Perhaps old Macintosh? (CR)
    elseif (! isempty (strfind (str(1 : eol_srch_len), "\r")))
      eol_char = "\r";
    ## Otherwise, use plain UNIX (LF)
    else
      eol_char = "\n";
    endif
    else
      eol_char = "\n";
    endif
    ## Set up the default endofline param value
    args(end+1:end+2) = {"endofline", eol_char};
  endif

  if (!ischar (fid))
    ## Now that we know what EOL looks like, we can process format_repeat_count.
    ## FIXME The below isn't ML-compatible: counts lines, not format string uses
    if (isfinite (nlines) && (nlines >= 0))
      l_eol_char = length (eol_char);
      eoi = findstr (str, eol_char);
      n_eoi = length (eoi);
      nblks = 0;
      ## Avoid slow repeated str concatenation, first seek requested end of data
      while (n_eoi < nlines && count == BUFLENGTH)
        [nstr, count] = fscanf (fid, "%c", BUFLENGTH);
        if (count > 0)
          ## Watch out for multichar EOL being missed across buffer boundaries
          if (l_eol_char > 1)
            str = [str(end - length (eol_char) + 2 : end) nstr];
          else
            str = nstr;
          endif
          eoi = findstr (str, eol_char);
          n_eoi += numel (eoi);
          ++nblks;
        endif
      endwhile
      ## OK, found EOL delimiting last requested line. Compute ptr (incl. EOL)
      if (isempty (eoi))
        printf ("textscan: format repeat count specified but no endofline found\n");
        data_size = nblks * BUFLENGTH + count;
      else
        ## Compute data size to read incl complete EOL
        data_size = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi) ...
                    + l_eol_char - 1;
      endif
      fseek (fid, st_pos, "bof");
      str = fscanf (fid, "%c", data_size);
    else
      fseek (fid, st_pos, "bof");
      str = fread (fid, "char=>char").';
    endif
  endif

  ## Strip trailing EOL to avoid returning stray missing values (f. strread).
  ## However, in case of CollectOutput request, presence of EOL is required;
  ## also in case of deliberately entered empty format string
  eol_at_end = strcmp (str(end-length (eol_char) + 1 : end), eol_char);
  if (collop || emptfmt)
    if (! eol_at_end)
      str(end+1 : end+length (eol_char)) = eol_char;
    endif
  elseif (eol_at_end)
     str(end-length (eol_char) + 1 : end) = "";
    ## A corner case: str may now be empty....
    if (isempty (str)); return; endif
   endif

  ## Call strread to make it do the real work
  C = cell (1, num_fields);
  [C{:}] = strread (str, format, args{:});

  ## I.c.o. empty format, match nr. of cols to nr. of fields on first read line
  if (emptfmt)
    ## Find end of first line
    eoi = index (str, eol_char);
    if (eoi)
      ## str contains an EOL, proceed with assessing nr. of columns
      ncols = countcols (C, str(1 : eoi-1), delimiter, whitespace);
      ## See if lowermost data row must be completed
      pad = mod (numel (C{1}), ncols);
      if (pad)
        ## Textscan returns NaNs for empty fields
        C(1) = [C{1}; NaN(ncols - pad, 1)]; 
      endif
      ## Replace NaNs with EmptyValue, if any
      ipos = find (strcmpi (args, "emptyvalue"));
      if (ipos)
        C{1}(find (isnan (C{1}))) = args{ipos+1};
      endif
      ## Compute nr. of rows
      nrows = floor (numel (C{1}) / ncols);
      ## Reshape C; watch out, transpose needed
      C(1) = reshape (C{1}, ncols, numel (C{1}) / ncols)';
      ## Distribute columns over C and wipe cols 2:end of C{1}
      for ii=2:ncols
        C(ii) = C{1}(:, ii);
      endfor
      C{1} = C{1}(:, 1);
    endif 
  endif

  ## If requested, collect output columns of same class
  if (collop)
    C = colloutp (C);
  endif

  if (nargout == 2)
    ## Remember file position (persistent var)
    position = ftell (fid);
  endif

endfunction


## Assess nr of data fields on first line of data
function ncols = countcols (C, str, dlm, wsp)

  if (isempty (dlm))
    ## Field separator = whitespace. Fold multiple whitespace into one
    str = regexprep (str, sprintf ("[%s]", wsp), " ");
    str = strtrim (str);
    ncols = numel (strfind (str, " ")) + 1;
  else
    ncols = numel (regexp (str, sprintf ("[%s]", dlm))) + 1;
  endif

endfunction


## Collect consecutive columns of same class into one cell column
function C = colloutp (C)

  ## Start at rightmost column and work backwards to avoid ptr mixup
  ii = numel (C);
  while (ii > 1)
    clss1 = class (C{ii});
    jj = ii;
    while (jj > 1 && strcmp (clss1, class (C{jj - 1})))
      ## Column to the left is still same class; check next column to the left
      --jj;
    endwhile
    if (jj < ii)
      ## Concatenate columns into current column
      C{jj} = [C{jj : ii}];
      ## Wipe concatenated columns to the right, resume search to the left
      C(jj+1 : ii) = [];
      ii = jj - 1;
    else
      ## No similar class in column to the left, search from there
      --ii;
    endif
  endwhile

endfunction


%!test
%! str = "1,  2,  3,  4\n 5,  ,  ,  8\n 9, 10, 11, 12";
%! fmtstr = "%f %d %f %s";
%! c = textscan (str, fmtstr, 2, "delimiter", ",", "emptyvalue", -Inf);
%! assert (isequal (c{1}, [1;5]));
%! assert (length (c{1}), 2);
%! assert (iscellstr (c{4}));
%! assert (isequal (c{3}, [3; -Inf]));

%!test
%! b = [10:10:100];
%! b = [b; 8*b/5];
%! str = sprintf ("%g miles/hr = %g kilometers/hr\n", b);
%! fmt = "%f miles/hr = %f kilometers/hr";
%! c = textscan (str, fmt);
%! assert (b(1,:)', c{1}, 1e-5);
%! assert (b(2,:)', c{2}, 1e-5);

%!test
%! str = "13, 72, NA, str1, 25\r\n// Middle line\r\n36, na, 05, str3, 6";
%! a = textscan (str, "%d %n %f %s %n", "delimiter", ",","treatAsEmpty", {"NA", "na"},"commentStyle", "//");
%! assert (a{1}, int32 ([13; 36]));
%! assert (a{2}, [72; NaN]);
%! assert (a{3}, [NaN; 5]);
%! assert (a{4}, {"str1"; "str3"});
%! assert (a{5}, [25; 6]);

%!test
%! str = "Km:10 = hhhBjjj miles16hour\r\n";
%! str = [str "Km:15 = hhhJjjj miles241hour\r\n"];
%! str = [str "Km:2 = hhhRjjj miles3hour\r\n"];
%! str = [str "Km:25 = hhhZ\r\n"];
%! fmt = "Km:%d = hhh%1sjjj miles%dhour";
%! a = textscan (str, fmt, "delimiter", " ");
%! assert (a{1}', int32 ([10 15 2 25]));
%! assert (a{2}', {'B' 'J' 'R' 'Z'});
%! assert (a{3}', int32 ([16 241 3 0]));

%% Test with default endofline parameter
%!test
%! c = textscan ("L1\nL2", "%s");
%! assert (c{:}, {"L1"; "L2"});

%% Test with endofline parameter set to "" (empty) - newline should be in word
%!test
%! c = textscan ("L1\nL2", "%s", "endofline", "");
%! assert (int8 (c{:}{:}), int8 ([ 76,  49,  10,  76,  50 ]));

%!test
%! ## No delimiters at all besides EOL.  Skip fields, even empty fields
%! str = "Text1Text2Text\nTextText4Text\nText57Text";
%! c = textscan (str, "Text%*dText%dText");
%! assert (c{1}, int32 ([2; 4; 0]));

%!test
%% CollectOutput test
%! b = [10:10:100];
%! b = [b; 8*b/5; 8*b*1000/5];
%! str = sprintf ("%g miles/hr = %g (%g) kilometers (meters)/hr\n", b);
%! fmt = "%f miles%s %s %f (%f) kilometers %*s";
%! c = textscan (str, fmt, "collectoutput", 1);
%! assert (size (c{3}), [10, 2]);
%! assert (size (c{2}), [10, 2]);

%!test
%% CollectOutput test with uneven column length files
%! b = [10:10:100];
%! b = [b; 8*b/5; 8*b*1000/5];
%! str = sprintf ("%g miles/hr = %g (%g) kilometers (meters)/hr\n", b);
%! str = [str "110 miles/hr"];
%! fmt = "%f miles%s %s %f (%f) kilometers %*s";
%! c = textscan (str, fmt, "collectoutput", 1);
%! assert (size (c{1}), [11, 1]);
%! assert (size (c{3}), [11, 2]);
%! assert (size (c{2}), [11, 2]);
%! assert (c{3}(end), NaN);
%! assert (c{2}{11, 1}, "/hr");
%! assert (isempty (c{2}{11, 2}), true);

%% Test input validation
%!error textscan ()
%!error textscan (single (4))
%!error textscan ({4})
%!error <must be a string> textscan ("Hello World", 2)
%!error <cannot provide position information> [C, pos] = textscan ("Hello World")
%!error <character value required> textscan ("Hello World", '%s', 'EndOfLine', 3)

%! Test incomplete first data line
%! R = textscan (['Empty1' char(10)], 'Empty%d %f');
%! assert (R{1}, int32 (1));
%! assert (isempty (R{2}), true);

%% bug #37023 (actually a strread test)
%!test
%! data = textscan("   1. 1 \n 2 3\n", '%f %f');
%! assert (data{1}, [1; 2], 1e-15);
%! assert (data{2}, [1; 3], 1e-15);

%%  Whitespace test (bug #37333) using delimiter ";"
%!test
%! tc = [];
%! tc{1, 1} = "C:/code;";
%! tc{1, end+1} = "C:/code/meas;";
%! tc{1, end+1} = " C:/code/sim;";
%! tc{1, end+1} = "C:/code/utils;";
%! string = [tc{:}];
%! c = textscan (string, "%s", "delimiter", ";");
%! for k = 1:numel (c{1})
%!   lh = c{1}{k};
%!   rh = tc{k};
%!   rh(rh == ";") = "";
%!   rh = strtrim (rh);
%!   assert (strcmp (lh, rh));
%! end

%%  Whitespace test (bug #37333), adding multipleDelimsAsOne true arg
%!test
%! tc = [];
%! tc{1, 1} = "C:/code;";
%! tc{1, end+1} = " C:/code/meas;";
%! tc{1, end+1} = "C:/code/sim;;";
%! tc{1, end+1} = "C:/code/utils;";
%! string = [tc{:}];
%! c = textscan (string, "%s", "delimiter", ";", "multipleDelimsAsOne", 1);
%! for k = 1:numel (c{1})
%!   lh = c{1}{k};
%!   rh = tc{k};
%!   rh(rh == ";") = "";
%!   rh = strtrim (rh);
%!   assert (strcmp (lh, rh));
%! end

%%  Whitespace test (bug #37333), adding multipleDelimsAsOne false arg
%!test
%! tc = [];
%! tc{1, 1} = "C:/code;";
%! tc{1, end+1} = " C:/code/meas;";
%! tc{1, end+1} = "C:/code/sim;;";
%! tc{1, end+1} = "";
%! tc{1, end+1} = "C:/code/utils;";
%! string = [tc{:}];
%! c = textscan (string, "%s", "delimiter", ";", "multipleDelimsAsOne", 0);
%! for k = 1:numel (c{1})
%!   lh = c{1}{k};
%!   rh = tc{k};
%!   rh(rh == ";") = "";
%!   rh = strtrim (rh);
%!   assert (strcmp (lh, rh));
%! end

%%  Whitespace test (bug #37333) whitespace "" arg
%!test
%! tc = [];
%! tc{1, 1} = "C:/code;";
%! tc{1, end+1} = " C:/code/meas;";
%! tc{1, end+1} = "C:/code/sim;";
%! tc{1, end+1} = "C:/code/utils;";
%! string = [tc{:}];
%! c = textscan (string, "%s", "delimiter", ";", "whitespace", "");
%! for k = 1:numel (c{1})
%!   lh = c{1}{k};
%!   rh = tc{k};
%!   rh(rh == ";") = "";
%!   assert (strcmp (lh, rh));
%! end

%%  Whitespace test (bug #37333), whitespace " " arg
%!test
%! tc = [];
%! tc{1, 1} = "C:/code;";
%! tc{1, end+1} = " C:/code/meas;";
%! tc{1, end+1} = "C:/code/sim;";
%! tc{1, end+1} = "C:/code/utils;";
%! string = [tc{:}];
%! c = textscan (string, "%s", "delimiter", ";", "whitespace", " ");
%! for k = 1:numel (c{1})
%!   lh = c{1}{k};
%!   rh = tc{k};
%!   rh(rh == ";") = "";
%!   rh = strtrim (rh);
%!   assert (strcmp (lh, rh));
%! end

%% Test reading from a real file
%!test
%! f = tmpnam ();
%! fid = fopen (f, "w+");
%! d = rand (1, 4);
%! fprintf (fid, "  %f %f   %f  %f ", d);
%! fseek (fid, 0, "bof");
%! A = textscan (fid, "%f %f");
%! fclose (fid);
%! unlink (f);
%! assert (A{1}, [d(1); d(3)], 1e-6);
%! assert (A{2}, [d(2); d(4)], 1e-6);

%% Tests reading with empty format, should return proper nr of columns
%!test
%! f = tmpnam ();
%! fid = fopen (f, "w+");
%! fprintf (fid, " 1 2 3 4\n5 6 7 8");
%! fseek (fid, 0, "bof");
%! A = textscan (fid, "");
%! fclose (fid);
%! unlink (f);
%! assert (A{1}, [1 ; 5], 1e-6);
%! assert (A{2}, [2 ; 6], 1e-6);
%! assert (A{3}, [3 ; 7], 1e-6);
%! assert (A{4}, [4 ; 8], 1e-6);

%% Tests reading with empty format; empty fields & incomplete lower row
%!test
%! f = tmpnam ();
%! fid = fopen (f, "w+");
%! fprintf (fid, " ,2,,4\n5,6");
%! fseek (fid, 0, "bof");
%! A = textscan (fid, "", "delimiter", ",", "EmptyValue", 999, "CollectOutput" , 1);
%! fclose (fid);
%! unlink (f);
%! assert (A{1}, [999, 2, 999, 4; 5, 6, 999, 999], 1e-6);

%% Error message tests

%!test
%! f = tmpnam ();
%! fid = fopen (f, "w+");
%! msg1 = "Missing or illegal value for 'headerlines'";
%! try
%! A = textscan (fid, "", "headerlines");
%! end_try_catch;
%! fclose (fid);
%! unlink (f);
%! assert (msg1, lasterr);

%!test
%! f = tmpnam ();
%! fid = fopen (f, "w+");
%! msg1 = "Missing or illegal value for 'headerlines'";
%! try
%! A = textscan (fid, "", "headerlines", "hh");
%! end_try_catch;
%! fclose (fid);
%! unlink (f);
%! assert (msg1, lasterr);

%!test
%! f = tmpnam ();
%! fid = fopen (f, "w+");
%! fprintf (fid,"some_string");
%! fseek (fid, 0, "bof");
%! msg1 = "textscan: illegal EndOfLine character value specified";
%! try
%! A = textscan (fid, "%f", "EndOfLine", "\n\r");
%! end_try_catch;
%! fclose (fid);
%! unlink (f);
%! assert (msg1, lasterr);

%!test
%! f = tmpnam ();
%! fid = fopen (f, "w+");
%! fprintf (fid,"some_string");
%! fseek (fid, 0, "bof");
%! msg1 = "textscan: character value required for EndOfLine";
%! try
%! A = textscan (fid, "%f", "EndOfLine", 33);
%! end_try_catch;
%! fclose (fid);
%! unlink (f);
%! assert (msg1, lasterr);