view scripts/io/textread.m @ 17281:bc924baa2c4e

doc: Add new @qcode macro for code samples which are quoted. Macro handles options ("on") or properties ("position") more elegantly than @code{"text"}. * doc/interpreter/macros.texi: Add new @qcode macro. * doc/interpreter/tips.txi: Add documentation about @qcode macro. * doc/interpreter/basics.txi, doc/interpreter/container.txi, doc/interpreter/emacs.txi, doc/interpreter/errors.txi, doc/interpreter/eval.txi, doc/interpreter/expr.txi, doc/interpreter/external.txi, doc/interpreter/func.txi, doc/interpreter/grammar.txi, doc/interpreter/image.txi, doc/interpreter/install.txi, doc/interpreter/interp.txi, doc/interpreter/io.txi, doc/interpreter/matrix.txi, doc/interpreter/numbers.txi, doc/interpreter/oop.txi, doc/interpreter/package.txi, doc/interpreter/plot.txi, doc/interpreter/quad.txi, doc/interpreter/sparse.txi, doc/interpreter/strings.txi, doc/interpreter/system.txi, doc/interpreter/vectorize.txi, libinterp/corefcn/balance.cc, libinterp/corefcn/bitfcns.cc, libinterp/corefcn/cellfun.cc, libinterp/corefcn/conv2.cc, libinterp/corefcn/data.cc, libinterp/corefcn/debug.cc, libinterp/corefcn/defaults.cc, libinterp/corefcn/dirfns.cc, libinterp/corefcn/dlmread.cc, libinterp/corefcn/error.cc, libinterp/corefcn/file-io.cc, libinterp/corefcn/find.cc, libinterp/corefcn/gammainc.cc, libinterp/corefcn/graphics.cc, libinterp/corefcn/help.cc, libinterp/corefcn/hex2num.cc, libinterp/corefcn/input.cc, libinterp/corefcn/load-path.cc, libinterp/corefcn/load-save.cc, libinterp/corefcn/ls-oct-ascii.cc, libinterp/corefcn/lu.cc, libinterp/corefcn/luinc.cc, libinterp/corefcn/matrix_type.cc, libinterp/corefcn/oct-hist.cc, libinterp/corefcn/pager.cc, libinterp/corefcn/pr-output.cc, libinterp/corefcn/pt-jit.cc, libinterp/corefcn/qz.cc, libinterp/corefcn/rand.cc, libinterp/corefcn/regexp.cc, libinterp/corefcn/schur.cc, libinterp/corefcn/sighandlers.cc, libinterp/corefcn/sparse.cc, libinterp/corefcn/spparms.cc, libinterp/corefcn/str2double.cc, libinterp/corefcn/svd.cc, libinterp/corefcn/symtab.cc, libinterp/corefcn/syscalls.cc, libinterp/corefcn/toplev.cc, libinterp/corefcn/tril.cc, libinterp/corefcn/typecast.cc, libinterp/corefcn/utils.cc, libinterp/corefcn/variables.cc, libinterp/dldfcn/__init_fltk__.cc, libinterp/dldfcn/chol.cc, libinterp/dldfcn/colamd.cc, libinterp/dldfcn/fftw.cc, libinterp/dldfcn/qr.cc, libinterp/dldfcn/symbfact.cc, libinterp/octave-value/ov-base.cc, libinterp/octave-value/ov-fcn-handle.cc, libinterp/octave-value/ov-fcn-inline.cc, libinterp/octave-value/ov-java.cc, libinterp/octave-value/ov-range.cc, libinterp/octave-value/ov-struct.cc, libinterp/octave-value/ov-usr-fcn.cc, libinterp/parse-tree/oct-parse.in.yy, libinterp/parse-tree/pt-binop.cc, libinterp/parse-tree/pt-eval.cc, libinterp/parse-tree/pt-mat.cc, scripts/@ftp/ftp.m, scripts/deprecated/java_convert_matrix.m, scripts/deprecated/java_debug.m, scripts/deprecated/java_unsigned_conversion.m, scripts/deprecated/shell_cmd.m, scripts/general/dblquad.m, scripts/general/display.m, scripts/general/genvarname.m, scripts/general/idivide.m, scripts/general/interp1.m, scripts/general/interp2.m, scripts/general/interp3.m, scripts/general/interpn.m, scripts/general/isa.m, scripts/general/profexplore.m, scripts/general/profile.m, scripts/general/quadgk.m, scripts/general/randi.m, scripts/general/structfun.m, scripts/general/subsindex.m, scripts/general/triplequad.m, scripts/geometry/griddata.m, scripts/geometry/griddata3.m, scripts/geometry/griddatan.m, scripts/geometry/voronoi.m, scripts/help/help.m, scripts/help/lookfor.m, scripts/image/cmpermute.m, scripts/image/colormap.m, scripts/image/image.m, scripts/image/imagesc.m, scripts/image/imfinfo.m, scripts/image/imformats.m, scripts/image/imread.m, scripts/image/imshow.m, scripts/image/imwrite.m, scripts/image/ind2gray.m, scripts/image/lines.m, scripts/image/rgb2ind.m, scripts/image/spinmap.m, scripts/io/dlmwrite.m, scripts/io/strread.m, scripts/io/textread.m, scripts/io/textscan.m, scripts/java/javaclasspath.m, scripts/java/usejava.m, scripts/miscellaneous/bzip2.m, scripts/miscellaneous/computer.m, scripts/miscellaneous/copyfile.m, scripts/miscellaneous/debug.m, scripts/miscellaneous/dos.m, scripts/miscellaneous/edit.m, scripts/miscellaneous/gzip.m, scripts/miscellaneous/license.m, scripts/miscellaneous/mkoctfile.m, scripts/miscellaneous/movefile.m, scripts/miscellaneous/parseparams.m, scripts/miscellaneous/unix.m, scripts/optimization/fminbnd.m, scripts/optimization/fminsearch.m, scripts/optimization/fminunc.m, scripts/optimization/fsolve.m, scripts/optimization/fzero.m, scripts/optimization/glpk.m, scripts/optimization/lsqnonneg.m, scripts/optimization/optimset.m, scripts/optimization/pqpnonneg.m, scripts/pkg/pkg.m, scripts/plot/allchild.m, scripts/plot/ancestor.m, scripts/plot/area.m, scripts/plot/axis.m, scripts/plot/bar.m, scripts/plot/barh.m, scripts/plot/box.m, scripts/plot/caxis.m, scripts/plot/cla.m, scripts/plot/clabel.m, scripts/plot/clf.m, scripts/plot/close.m, scripts/plot/colorbar.m, scripts/plot/daspect.m, scripts/plot/ezmesh.m, scripts/plot/ezmeshc.m, scripts/plot/ezsurf.m, scripts/plot/ezsurfc.m, scripts/plot/findall.m, scripts/plot/findobj.m, scripts/plot/gcbo.m, scripts/plot/gcf.m, scripts/plot/gco.m, scripts/plot/grid.m, scripts/plot/guihandles.m, scripts/plot/hdl2struct.m, scripts/plot/hidden.m, scripts/plot/hold.m, scripts/plot/isonormals.m, scripts/plot/isosurface.m, scripts/plot/legend.m, scripts/plot/mesh.m, scripts/plot/meshc.m, scripts/plot/meshz.m, scripts/plot/newplot.m, scripts/plot/orient.m, scripts/plot/pareto.m, scripts/plot/patch.m, scripts/plot/pbaspect.m, scripts/plot/pcolor.m, scripts/plot/plot.m, scripts/plot/print.m, scripts/plot/private/__add_default_menu__.m, scripts/plot/quiver.m, scripts/plot/quiver3.m, scripts/plot/refreshdata.m, scripts/plot/saveas.m, scripts/plot/scatter.m, scripts/plot/scatter3.m, scripts/plot/shading.m, scripts/plot/shrinkfaces.m, scripts/plot/slice.m, scripts/plot/stem.m, scripts/plot/stem3.m, scripts/plot/struct2hdl.m, scripts/plot/subplot.m, scripts/plot/surf.m, scripts/plot/surfc.m, scripts/plot/surfl.m, scripts/plot/tetramesh.m, scripts/plot/uigetfile.m, scripts/plot/uimenu.m, scripts/plot/uiputfile.m, scripts/plot/waterfall.m, scripts/plot/whitebg.m, scripts/plot/xlim.m, scripts/plot/ylim.m, scripts/plot/zlim.m, scripts/polynomial/conv.m, scripts/polynomial/polyout.m, scripts/polynomial/splinefit.m, scripts/set/ismember.m, scripts/set/powerset.m, scripts/set/setdiff.m, scripts/set/union.m, scripts/set/unique.m, scripts/signal/detrend.m, scripts/signal/filter2.m, scripts/signal/freqz.m, scripts/signal/periodogram.m, scripts/signal/spectral_adf.m, scripts/signal/spectral_xdf.m, scripts/sparse/eigs.m, scripts/sparse/svds.m, scripts/specfun/legendre.m, scripts/special-matrix/gallery.m, scripts/statistics/base/mean.m, scripts/statistics/base/moment.m, scripts/statistics/tests/cor_test.m, scripts/statistics/tests/kolmogorov_smirnov_test.m, scripts/statistics/tests/kolmogorov_smirnov_test_2.m, scripts/statistics/tests/kruskal_wallis_test.m, scripts/statistics/tests/prop_test_2.m, scripts/statistics/tests/sign_test.m, scripts/statistics/tests/t_test.m, scripts/statistics/tests/t_test_2.m, scripts/statistics/tests/t_test_regression.m, scripts/statistics/tests/u_test.m, scripts/statistics/tests/var_test.m, scripts/statistics/tests/welch_test.m, scripts/statistics/tests/wilcoxon_test.m, scripts/statistics/tests/z_test.m, scripts/statistics/tests/z_test_2.m, scripts/strings/base2dec.m, scripts/strings/index.m, scripts/strings/isstrprop.m, scripts/strings/mat2str.m, scripts/strings/regexptranslate.m, scripts/strings/rindex.m, scripts/strings/str2num.m, scripts/strings/strcat.m, scripts/strings/strjust.m, scripts/strings/strmatch.m, scripts/strings/validatestring.m, scripts/testfun/demo.m, scripts/testfun/example.m, scripts/testfun/test.m, scripts/time/addtodate.m, scripts/time/asctime.m, scripts/time/datestr.m, scripts/time/datetick.m, scripts/time/weekday.m, scripts/ui/errordlg.m, scripts/ui/helpdlg.m, scripts/ui/inputdlg.m, scripts/ui/listdlg.m, scripts/ui/msgbox.m, scripts/ui/questdlg.m, scripts/ui/warndlg.m: Use new @qcode macro.
author Rik <rik@octave.org>
date Mon, 19 Aug 2013 20:46:38 -0700
parents 12005245b645
children 088d014a7fe2
line wrap: on
line source

## Copyright (C) 2009-2013 Eric Chassande-Mottin, CNRS (France)
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn  {Function File} {[@var{a}, @dots{}] =} textread (@var{filename})
## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format})
## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}, @var{n})
## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}, @var{prop1}, @var{value1}, @dots{})
## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}, @var{n}, @var{prop1}, @var{value1}, @dots{})
## Read data from a text file.
##
## The file @var{filename} is read and parsed according to @var{format}.  The
## function behaves like @code{strread} except it works by parsing a file
## instead of a string.  See the documentation of @code{strread} for details.
##
## In addition to the options supported by @code{strread}, this function
## supports two more:
##
## @itemize
## @item @qcode{"headerlines"}:
## The first @var{value} number of lines of @var{filename} are skipped.
##
## @item @qcode{"endofline"}:
## Specify a single character or @qcode{"\r\n"}.  If no value is given, it
## will be inferred from the file.  If set to "" (empty string) EOLs are
## ignored as delimiters.
## @end itemize
##
## The optional input @var{n} specifies the number of data lines to read; in
## this sense it differs slightly from the format repeat count in strread.
##
## If the format string is empty (not: omitted) and the file contains only
## numeric data (excluding headerlines), textread will return a rectangular
## matrix with the number of columns matching the number of numeric fields on
## the first data line of the file.  Empty fields are returned as zero values.
##
## @seealso{strread, load, dlmread, fscanf, textscan}
## @end deftypefn

function varargout = textread (filename, format = "%f", varargin)

  BUFLENGTH = 4096;       # Read buffer to speed up processing @var{n}

  ## Check input
  if (nargin < 1)
    print_usage ();
  endif

  if (! ischar (filename) || ! ischar (format))
    error ("textread: FILENAME and FORMAT arguments must be strings");
  endif

  if (! isempty (varargin) && isnumeric (varargin{1}))
    nlines = varargin{1};
  else
    nlines = Inf;
  endif
  if (nlines < 1)
    printf ("textread: N = 0, no data read\n");
    varargout = cell (1, nargout);
    return
  endif

  ## Read file
  fid = fopen (filename, "r");
  if (fid == -1)
    error ("textread: could not open '%s' for reading", filename);
  endif

  ## Skip header lines if requested
  headerlines = find (strcmpi (varargin, "headerlines"), 1);
  if (! isempty (headerlines))
    ## Beware of missing or wrong headerline value
    if (headerlines  == numel (varargin)
       || ! isnumeric (varargin{headerlines + 1}))
      error ("missing or illegal value for 'headerlines'" );
    endif
    ## Avoid conveying floats to fskipl
    varargin{headerlines + 1} = round (varargin{headerlines + 1});
    ## Beware of zero valued headerline, fskipl would skip to EOF
    if (varargin{headerlines + 1} > 0)
      fskipl (fid, varargin{headerlines + 1});
      varargin(headerlines:headerlines+1) = [];
      nargin = nargin - 2;
    elseif (varargin{headerlines + 1} < 0)
      warning ("textread: negative headerline value ignored");
    endif
  endif
  st_pos = ftell (fid);

  ## Read a first file chunk. Rest follows after endofline processing
  [str, count] = fscanf (fid, "%c", BUFLENGTH);
  if (isempty (str) || count < 1)
    warning ("textread: empty file");
    varargout = cell (1, nargout);
    return;
  endif

  endofline = find (strcmpi (varargin, "endofline"), 1);
  if (! isempty (endofline))
    ## 'endofline' option set by user.
    if (! ischar (varargin{endofline + 1}));
      error ("character value required for EndOfLine");
    endif
  else
    ## Determine EOL from file.  Search for EOL candidates in first BUFLENGTH chars
    eol_srch_len = min (length (str), BUFLENGTH);
    ## First try DOS (CRLF)
    if (! isempty (strfind (str(1 : eol_srch_len), "\r\n")))
      eol_char = "\r\n";
    ## Perhaps old Macintosh? (CR)
    elseif (! isempty (strfind (str(1 : eol_srch_len), "\r")))
      eol_char = "\r";
    ## Otherwise, use plain *nix (LF)
    else
      eol_char = "\n";
    endif
    ## Set up default endofline param value
    varargin(end+1:end+2) = {"endofline", eol_char};
  endif
 
  ## Now that we know what EOL looks like, we can process format_repeat_count.
  ## FIXME The below isn't ML-compatible: counts lines, not format string uses
  if (isfinite (nlines) && (nlines > 0))
    l_eol_char = length (eol_char);
    eoi = findstr (str, eol_char);
    n_eoi = length (eoi);
    nblks = 0;
    ## Avoid slow repeated str concatenation, first seek requested end of data
    while (n_eoi < nlines && count == BUFLENGTH)
      [nstr, count] = fscanf (fid, "%c", BUFLENGTH);
      if (count > 0)
        ## Watch out for multichar EOL being missed across buffer boundaries
        if (l_eol_char > 1)
          str = [str(end - length (eol_char) + 2 : end) nstr];
        else
          str = nstr;
        endif
        eoi = findstr (str, eol_char);
        n_eoi += numel (eoi);
        ++nblks;
      endif
    endwhile
    ## Found EOL delimiting last requested line. Compute ptr (incl. EOL)
    if (isempty (eoi))
      printf ("textread: format repeat count specified but no endofline found\n");
      eoi_pos = nblks * BUFLENGTH + count;
    else
      eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi);
    endif
    fseek (fid, st_pos, "bof");
    str = fscanf (fid, "%c", eoi_pos);
  else
    fseek (fid, st_pos, "bof");
    str = fread (fid, "char=>char").';
  endif
  fclose (fid);
 
  ## Set up default whitespace param value if needed
  if (isempty (find (strcmpi ("whitespace", varargin))))
    varargin(end+1:end+2) = {"whitespace", " \b\t"};
  endif

  ## Call strread to make it do the real work
  [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:});

  ## Hack to concatenate/reshape numeric output into 2D array (undocumented ML)
  ## In ML this only works in case of an empty format string
  if (isempty (format))
    ## Get number of fields per line. 
    ## 1. Get eol_char position
    iwhsp = find (strcmpi ("whitespace", varargin));
    whsp = varargin{iwhsp + 1};
    idx = regexp (str, eol_char, "once");
    ## 2. Get first data line til EOL. Avoid corner case of just one line
    if (! isempty (idx))
      str = str(1:idx-1);
    endif
    idelimiter = find (strcmpi (varargin, "delimiter"), 1);
    if (isempty (idelimiter))
      ## Assume delimiter = whitespace
      ## 3A. whitespace incl. consecutive whitespace => single space
      str = regexprep (str, sprintf ("[%s]+", whsp), ' ');
      ## 4A. Remove possible leading & trailing spaces
      str = strtrim (str);
      ## 5A. Count spaces, add one to get nr of data fields per line
      ncols = numel (strfind (str, " ")) + 1;
    else
      ## 3B. Just count delimiters. FIXME: delimiters could occur in literals
      delimiter = varargin {idelimiter+1};
      ncols = numel (regexp (str, sprintf ("[%s]", delimiter))) + 1;
    endif
    ## 6. Reshape; watch out, we need a transpose
    nrows = ceil (numel (varargout{1}) / ncols);
    pad = mod (numel (varargout{1}), ncols);
    if (pad > 0)
      pad = ncols - pad;
      varargout{1}(end+1 : end+pad) = NaN;
    endif
    varargout{1} = reshape (varargout{1}, ncols, nrows)';
    ## ML replaces empty values with NaNs
    varargout{1}(find (isnan (varargout{1}))) = 0;
  endif

endfunction

%!test
%! f = tmpnam ();
%! d = rand (5, 3);
%! dlmwrite (f, d, "precision", "%5.2f");
%! [a, b, c] = textread (f, "%f %f %f", "delimiter", ",", "headerlines", 3);
%! unlink (f);
%! assert (a, d(4:5, 1), 1e-2);
%! assert (b, d(4:5, 2), 1e-2);
%! assert (c, d(4:5, 3), 1e-2);

%!test
%! f = tmpnam ();
%! d = rand (7, 2);
%! dlmwrite (f, d, "precision", "%5.2f");
%! [a, b] = textread (f, "%f, %f", "headerlines", 1);
%! unlink (f);
%! assert (a, d(2:7, 1), 1e-2);

%% Test reading 2D matrix with empty format
%!test
%! f = tmpnam ();
%! d = rand (5, 2);
%! dlmwrite (f, d, "precision", "%5.2f");
%! A = textread (f, "", "headerlines", 3);
%! unlink (f);
%! assert (A, d(4:5, :), 1e-2);

%% Read multiple lines using empty format string
%!test
%! f = tmpnam ();
%! unlink (f);
%! fid = fopen (f, "w");
%! d = rand (1, 4);
%! fprintf (fid, "  %f %f   %f  %f ", d);
%! fclose (fid);
%! A = textread (f, "");
%! unlink (f);
%! assert (A, d, 1e-6);

%% Empty format, corner case = one line w/o EOL
%!test
%! f = tmpnam ();
%! unlink (f);
%! fid = fopen (f, "w");
%! d = rand (1, 4);
%! fprintf (fid, "  %f %f   %f  %f ", d);
%! fclose (fid);
%! A = textread (f, "");
%! unlink (f);
%! assert (A, d, 1e-6);

%% Read multiple lines using empty format string, missing data (should be 0)
%!test
%! f = tmpnam ();
%! unlink (f);
%! fid = fopen (f, "w");
%! d = rand (1, 4);
%! fprintf (fid, "%f, %f, ,  %f,  %f ", d);
%! fclose (fid);
%! A = textread (f, "");
%! unlink (f);
%! assert (A, [ d(1:2) 0 d(3:4)], 1e-6);

%% Test with empty positions - ML returns 0 for empty fields
%!test
%! f = tmpnam ();
%! unlink (f);
%! fid = fopen (f, "w");
%! d = rand (1, 4);
%! fprintf (fid, ",2,,4\n5,,7,\n");
%! fclose (fid);
%! A = textread (f, "", "delimiter", ",");
%! unlink (f);
%! assert (A, [0 2 0 4; 5 0 7 0], 1e-6);

%% Another test with empty format + positions, now with more incomplete lower
%% row (must be appended with zeros to get rectangular matrix)
%!test
%! f = tmpnam ();
%! unlink (f);
%! fid = fopen (f, "w");
%! d = rand (1, 4);
%! fprintf (fid, ",2,,4\n5,\n");
%! fclose (fid);
%! A = textread (f, "", "delimiter", ",");
%! unlink (f);
%! assert (A, [0 2 0 4; 5 0 0 0], 1e-6);

%% Test input validation
%!error textread ()
%!error textread (1)
%!error <arguments must be strings> textread (1, "%f")
%!error <arguments must be strings> textread ("fname", 1)
%!error <missing or illegal value for> textread (file_in_loadpath ("textread.m"), "", "headerlines")
%!error <missing or illegal value for> textread (file_in_loadpath ("textread.m"), "", "headerlines", 'hh')
%!error <character value required for> textread (file_in_loadpath ("textread.m"), "%s", "endofline", true)