Mercurial > octave-nkf
view scripts/io/textread.m @ 17281:bc924baa2c4e
doc: Add new @qcode macro for code samples which are quoted.
Macro handles options ("on") or properties ("position") more elegantly
than @code{"text"}.
* doc/interpreter/macros.texi: Add new @qcode macro.
* doc/interpreter/tips.txi: Add documentation about @qcode macro.
* doc/interpreter/basics.txi, doc/interpreter/container.txi,
doc/interpreter/emacs.txi, doc/interpreter/errors.txi,
doc/interpreter/eval.txi, doc/interpreter/expr.txi,
doc/interpreter/external.txi, doc/interpreter/func.txi,
doc/interpreter/grammar.txi, doc/interpreter/image.txi,
doc/interpreter/install.txi, doc/interpreter/interp.txi,
doc/interpreter/io.txi, doc/interpreter/matrix.txi,
doc/interpreter/numbers.txi, doc/interpreter/oop.txi,
doc/interpreter/package.txi, doc/interpreter/plot.txi,
doc/interpreter/quad.txi, doc/interpreter/sparse.txi,
doc/interpreter/strings.txi, doc/interpreter/system.txi,
doc/interpreter/vectorize.txi, libinterp/corefcn/balance.cc,
libinterp/corefcn/bitfcns.cc, libinterp/corefcn/cellfun.cc,
libinterp/corefcn/conv2.cc, libinterp/corefcn/data.cc,
libinterp/corefcn/debug.cc, libinterp/corefcn/defaults.cc,
libinterp/corefcn/dirfns.cc, libinterp/corefcn/dlmread.cc,
libinterp/corefcn/error.cc, libinterp/corefcn/file-io.cc,
libinterp/corefcn/find.cc, libinterp/corefcn/gammainc.cc,
libinterp/corefcn/graphics.cc, libinterp/corefcn/help.cc,
libinterp/corefcn/hex2num.cc, libinterp/corefcn/input.cc,
libinterp/corefcn/load-path.cc, libinterp/corefcn/load-save.cc,
libinterp/corefcn/ls-oct-ascii.cc, libinterp/corefcn/lu.cc,
libinterp/corefcn/luinc.cc, libinterp/corefcn/matrix_type.cc,
libinterp/corefcn/oct-hist.cc, libinterp/corefcn/pager.cc,
libinterp/corefcn/pr-output.cc, libinterp/corefcn/pt-jit.cc,
libinterp/corefcn/qz.cc, libinterp/corefcn/rand.cc,
libinterp/corefcn/regexp.cc, libinterp/corefcn/schur.cc,
libinterp/corefcn/sighandlers.cc, libinterp/corefcn/sparse.cc,
libinterp/corefcn/spparms.cc, libinterp/corefcn/str2double.cc,
libinterp/corefcn/svd.cc, libinterp/corefcn/symtab.cc,
libinterp/corefcn/syscalls.cc, libinterp/corefcn/toplev.cc,
libinterp/corefcn/tril.cc, libinterp/corefcn/typecast.cc,
libinterp/corefcn/utils.cc, libinterp/corefcn/variables.cc,
libinterp/dldfcn/__init_fltk__.cc, libinterp/dldfcn/chol.cc,
libinterp/dldfcn/colamd.cc, libinterp/dldfcn/fftw.cc, libinterp/dldfcn/qr.cc,
libinterp/dldfcn/symbfact.cc, libinterp/octave-value/ov-base.cc,
libinterp/octave-value/ov-fcn-handle.cc,
libinterp/octave-value/ov-fcn-inline.cc, libinterp/octave-value/ov-java.cc,
libinterp/octave-value/ov-range.cc, libinterp/octave-value/ov-struct.cc,
libinterp/octave-value/ov-usr-fcn.cc, libinterp/parse-tree/oct-parse.in.yy,
libinterp/parse-tree/pt-binop.cc, libinterp/parse-tree/pt-eval.cc,
libinterp/parse-tree/pt-mat.cc, scripts/@ftp/ftp.m,
scripts/deprecated/java_convert_matrix.m, scripts/deprecated/java_debug.m,
scripts/deprecated/java_unsigned_conversion.m, scripts/deprecated/shell_cmd.m,
scripts/general/dblquad.m, scripts/general/display.m,
scripts/general/genvarname.m, scripts/general/idivide.m,
scripts/general/interp1.m, scripts/general/interp2.m,
scripts/general/interp3.m, scripts/general/interpn.m, scripts/general/isa.m,
scripts/general/profexplore.m, scripts/general/profile.m,
scripts/general/quadgk.m, scripts/general/randi.m, scripts/general/structfun.m,
scripts/general/subsindex.m, scripts/general/triplequad.m,
scripts/geometry/griddata.m, scripts/geometry/griddata3.m,
scripts/geometry/griddatan.m, scripts/geometry/voronoi.m, scripts/help/help.m,
scripts/help/lookfor.m, scripts/image/cmpermute.m, scripts/image/colormap.m,
scripts/image/image.m, scripts/image/imagesc.m, scripts/image/imfinfo.m,
scripts/image/imformats.m, scripts/image/imread.m, scripts/image/imshow.m,
scripts/image/imwrite.m, scripts/image/ind2gray.m, scripts/image/lines.m,
scripts/image/rgb2ind.m, scripts/image/spinmap.m, scripts/io/dlmwrite.m,
scripts/io/strread.m, scripts/io/textread.m, scripts/io/textscan.m,
scripts/java/javaclasspath.m, scripts/java/usejava.m,
scripts/miscellaneous/bzip2.m, scripts/miscellaneous/computer.m,
scripts/miscellaneous/copyfile.m, scripts/miscellaneous/debug.m,
scripts/miscellaneous/dos.m, scripts/miscellaneous/edit.m,
scripts/miscellaneous/gzip.m, scripts/miscellaneous/license.m,
scripts/miscellaneous/mkoctfile.m, scripts/miscellaneous/movefile.m,
scripts/miscellaneous/parseparams.m, scripts/miscellaneous/unix.m,
scripts/optimization/fminbnd.m, scripts/optimization/fminsearch.m,
scripts/optimization/fminunc.m, scripts/optimization/fsolve.m,
scripts/optimization/fzero.m, scripts/optimization/glpk.m,
scripts/optimization/lsqnonneg.m, scripts/optimization/optimset.m,
scripts/optimization/pqpnonneg.m, scripts/pkg/pkg.m, scripts/plot/allchild.m,
scripts/plot/ancestor.m, scripts/plot/area.m, scripts/plot/axis.m,
scripts/plot/bar.m, scripts/plot/barh.m, scripts/plot/box.m,
scripts/plot/caxis.m, scripts/plot/cla.m, scripts/plot/clabel.m,
scripts/plot/clf.m, scripts/plot/close.m, scripts/plot/colorbar.m,
scripts/plot/daspect.m, scripts/plot/ezmesh.m, scripts/plot/ezmeshc.m,
scripts/plot/ezsurf.m, scripts/plot/ezsurfc.m, scripts/plot/findall.m,
scripts/plot/findobj.m, scripts/plot/gcbo.m, scripts/plot/gcf.m,
scripts/plot/gco.m, scripts/plot/grid.m, scripts/plot/guihandles.m,
scripts/plot/hdl2struct.m, scripts/plot/hidden.m, scripts/plot/hold.m,
scripts/plot/isonormals.m, scripts/plot/isosurface.m, scripts/plot/legend.m,
scripts/plot/mesh.m, scripts/plot/meshc.m, scripts/plot/meshz.m,
scripts/plot/newplot.m, scripts/plot/orient.m, scripts/plot/pareto.m,
scripts/plot/patch.m, scripts/plot/pbaspect.m, scripts/plot/pcolor.m,
scripts/plot/plot.m, scripts/plot/print.m,
scripts/plot/private/__add_default_menu__.m, scripts/plot/quiver.m,
scripts/plot/quiver3.m, scripts/plot/refreshdata.m, scripts/plot/saveas.m,
scripts/plot/scatter.m, scripts/plot/scatter3.m, scripts/plot/shading.m,
scripts/plot/shrinkfaces.m, scripts/plot/slice.m, scripts/plot/stem.m,
scripts/plot/stem3.m, scripts/plot/struct2hdl.m, scripts/plot/subplot.m,
scripts/plot/surf.m, scripts/plot/surfc.m, scripts/plot/surfl.m,
scripts/plot/tetramesh.m, scripts/plot/uigetfile.m, scripts/plot/uimenu.m,
scripts/plot/uiputfile.m, scripts/plot/waterfall.m, scripts/plot/whitebg.m,
scripts/plot/xlim.m, scripts/plot/ylim.m, scripts/plot/zlim.m,
scripts/polynomial/conv.m, scripts/polynomial/polyout.m,
scripts/polynomial/splinefit.m, scripts/set/ismember.m, scripts/set/powerset.m,
scripts/set/setdiff.m, scripts/set/union.m, scripts/set/unique.m,
scripts/signal/detrend.m, scripts/signal/filter2.m, scripts/signal/freqz.m,
scripts/signal/periodogram.m, scripts/signal/spectral_adf.m,
scripts/signal/spectral_xdf.m, scripts/sparse/eigs.m, scripts/sparse/svds.m,
scripts/specfun/legendre.m, scripts/special-matrix/gallery.m,
scripts/statistics/base/mean.m, scripts/statistics/base/moment.m,
scripts/statistics/tests/cor_test.m,
scripts/statistics/tests/kolmogorov_smirnov_test.m,
scripts/statistics/tests/kolmogorov_smirnov_test_2.m,
scripts/statistics/tests/kruskal_wallis_test.m,
scripts/statistics/tests/prop_test_2.m, scripts/statistics/tests/sign_test.m,
scripts/statistics/tests/t_test.m, scripts/statistics/tests/t_test_2.m,
scripts/statistics/tests/t_test_regression.m,
scripts/statistics/tests/u_test.m, scripts/statistics/tests/var_test.m,
scripts/statistics/tests/welch_test.m,
scripts/statistics/tests/wilcoxon_test.m, scripts/statistics/tests/z_test.m,
scripts/statistics/tests/z_test_2.m, scripts/strings/base2dec.m,
scripts/strings/index.m, scripts/strings/isstrprop.m,
scripts/strings/mat2str.m, scripts/strings/regexptranslate.m,
scripts/strings/rindex.m, scripts/strings/str2num.m, scripts/strings/strcat.m,
scripts/strings/strjust.m, scripts/strings/strmatch.m,
scripts/strings/validatestring.m, scripts/testfun/demo.m,
scripts/testfun/example.m, scripts/testfun/test.m, scripts/time/addtodate.m,
scripts/time/asctime.m, scripts/time/datestr.m, scripts/time/datetick.m,
scripts/time/weekday.m, scripts/ui/errordlg.m, scripts/ui/helpdlg.m,
scripts/ui/inputdlg.m, scripts/ui/listdlg.m, scripts/ui/msgbox.m,
scripts/ui/questdlg.m, scripts/ui/warndlg.m: Use new @qcode macro.
author | Rik <rik@octave.org> |
---|---|
date | Mon, 19 Aug 2013 20:46:38 -0700 |
parents | 12005245b645 |
children | 088d014a7fe2 |
line wrap: on
line source
## Copyright (C) 2009-2013 Eric Chassande-Mottin, CNRS (France) ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## <http://www.gnu.org/licenses/>. ## -*- texinfo -*- ## @deftypefn {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}) ## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}) ## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}, @var{n}) ## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}, @var{prop1}, @var{value1}, @dots{}) ## @deftypefnx {Function File} {[@var{a}, @dots{}] =} textread (@var{filename}, @var{format}, @var{n}, @var{prop1}, @var{value1}, @dots{}) ## Read data from a text file. ## ## The file @var{filename} is read and parsed according to @var{format}. The ## function behaves like @code{strread} except it works by parsing a file ## instead of a string. See the documentation of @code{strread} for details. ## ## In addition to the options supported by @code{strread}, this function ## supports two more: ## ## @itemize ## @item @qcode{"headerlines"}: ## The first @var{value} number of lines of @var{filename} are skipped. ## ## @item @qcode{"endofline"}: ## Specify a single character or @qcode{"\r\n"}. If no value is given, it ## will be inferred from the file. If set to "" (empty string) EOLs are ## ignored as delimiters. ## @end itemize ## ## The optional input @var{n} specifies the number of data lines to read; in ## this sense it differs slightly from the format repeat count in strread. ## ## If the format string is empty (not: omitted) and the file contains only ## numeric data (excluding headerlines), textread will return a rectangular ## matrix with the number of columns matching the number of numeric fields on ## the first data line of the file. Empty fields are returned as zero values. ## ## @seealso{strread, load, dlmread, fscanf, textscan} ## @end deftypefn function varargout = textread (filename, format = "%f", varargin) BUFLENGTH = 4096; # Read buffer to speed up processing @var{n} ## Check input if (nargin < 1) print_usage (); endif if (! ischar (filename) || ! ischar (format)) error ("textread: FILENAME and FORMAT arguments must be strings"); endif if (! isempty (varargin) && isnumeric (varargin{1})) nlines = varargin{1}; else nlines = Inf; endif if (nlines < 1) printf ("textread: N = 0, no data read\n"); varargout = cell (1, nargout); return endif ## Read file fid = fopen (filename, "r"); if (fid == -1) error ("textread: could not open '%s' for reading", filename); endif ## Skip header lines if requested headerlines = find (strcmpi (varargin, "headerlines"), 1); if (! isempty (headerlines)) ## Beware of missing or wrong headerline value if (headerlines == numel (varargin) || ! isnumeric (varargin{headerlines + 1})) error ("missing or illegal value for 'headerlines'" ); endif ## Avoid conveying floats to fskipl varargin{headerlines + 1} = round (varargin{headerlines + 1}); ## Beware of zero valued headerline, fskipl would skip to EOF if (varargin{headerlines + 1} > 0) fskipl (fid, varargin{headerlines + 1}); varargin(headerlines:headerlines+1) = []; nargin = nargin - 2; elseif (varargin{headerlines + 1} < 0) warning ("textread: negative headerline value ignored"); endif endif st_pos = ftell (fid); ## Read a first file chunk. Rest follows after endofline processing [str, count] = fscanf (fid, "%c", BUFLENGTH); if (isempty (str) || count < 1) warning ("textread: empty file"); varargout = cell (1, nargout); return; endif endofline = find (strcmpi (varargin, "endofline"), 1); if (! isempty (endofline)) ## 'endofline' option set by user. if (! ischar (varargin{endofline + 1})); error ("character value required for EndOfLine"); endif else ## Determine EOL from file. Search for EOL candidates in first BUFLENGTH chars eol_srch_len = min (length (str), BUFLENGTH); ## First try DOS (CRLF) if (! isempty (strfind (str(1 : eol_srch_len), "\r\n"))) eol_char = "\r\n"; ## Perhaps old Macintosh? (CR) elseif (! isempty (strfind (str(1 : eol_srch_len), "\r"))) eol_char = "\r"; ## Otherwise, use plain *nix (LF) else eol_char = "\n"; endif ## Set up default endofline param value varargin(end+1:end+2) = {"endofline", eol_char}; endif ## Now that we know what EOL looks like, we can process format_repeat_count. ## FIXME The below isn't ML-compatible: counts lines, not format string uses if (isfinite (nlines) && (nlines > 0)) l_eol_char = length (eol_char); eoi = findstr (str, eol_char); n_eoi = length (eoi); nblks = 0; ## Avoid slow repeated str concatenation, first seek requested end of data while (n_eoi < nlines && count == BUFLENGTH) [nstr, count] = fscanf (fid, "%c", BUFLENGTH); if (count > 0) ## Watch out for multichar EOL being missed across buffer boundaries if (l_eol_char > 1) str = [str(end - length (eol_char) + 2 : end) nstr]; else str = nstr; endif eoi = findstr (str, eol_char); n_eoi += numel (eoi); ++nblks; endif endwhile ## Found EOL delimiting last requested line. Compute ptr (incl. EOL) if (isempty (eoi)) printf ("textread: format repeat count specified but no endofline found\n"); eoi_pos = nblks * BUFLENGTH + count; else eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi); endif fseek (fid, st_pos, "bof"); str = fscanf (fid, "%c", eoi_pos); else fseek (fid, st_pos, "bof"); str = fread (fid, "char=>char").'; endif fclose (fid); ## Set up default whitespace param value if needed if (isempty (find (strcmpi ("whitespace", varargin)))) varargin(end+1:end+2) = {"whitespace", " \b\t"}; endif ## Call strread to make it do the real work [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:}); ## Hack to concatenate/reshape numeric output into 2D array (undocumented ML) ## In ML this only works in case of an empty format string if (isempty (format)) ## Get number of fields per line. ## 1. Get eol_char position iwhsp = find (strcmpi ("whitespace", varargin)); whsp = varargin{iwhsp + 1}; idx = regexp (str, eol_char, "once"); ## 2. Get first data line til EOL. Avoid corner case of just one line if (! isempty (idx)) str = str(1:idx-1); endif idelimiter = find (strcmpi (varargin, "delimiter"), 1); if (isempty (idelimiter)) ## Assume delimiter = whitespace ## 3A. whitespace incl. consecutive whitespace => single space str = regexprep (str, sprintf ("[%s]+", whsp), ' '); ## 4A. Remove possible leading & trailing spaces str = strtrim (str); ## 5A. Count spaces, add one to get nr of data fields per line ncols = numel (strfind (str, " ")) + 1; else ## 3B. Just count delimiters. FIXME: delimiters could occur in literals delimiter = varargin {idelimiter+1}; ncols = numel (regexp (str, sprintf ("[%s]", delimiter))) + 1; endif ## 6. Reshape; watch out, we need a transpose nrows = ceil (numel (varargout{1}) / ncols); pad = mod (numel (varargout{1}), ncols); if (pad > 0) pad = ncols - pad; varargout{1}(end+1 : end+pad) = NaN; endif varargout{1} = reshape (varargout{1}, ncols, nrows)'; ## ML replaces empty values with NaNs varargout{1}(find (isnan (varargout{1}))) = 0; endif endfunction %!test %! f = tmpnam (); %! d = rand (5, 3); %! dlmwrite (f, d, "precision", "%5.2f"); %! [a, b, c] = textread (f, "%f %f %f", "delimiter", ",", "headerlines", 3); %! unlink (f); %! assert (a, d(4:5, 1), 1e-2); %! assert (b, d(4:5, 2), 1e-2); %! assert (c, d(4:5, 3), 1e-2); %!test %! f = tmpnam (); %! d = rand (7, 2); %! dlmwrite (f, d, "precision", "%5.2f"); %! [a, b] = textread (f, "%f, %f", "headerlines", 1); %! unlink (f); %! assert (a, d(2:7, 1), 1e-2); %% Test reading 2D matrix with empty format %!test %! f = tmpnam (); %! d = rand (5, 2); %! dlmwrite (f, d, "precision", "%5.2f"); %! A = textread (f, "", "headerlines", 3); %! unlink (f); %! assert (A, d(4:5, :), 1e-2); %% Read multiple lines using empty format string %!test %! f = tmpnam (); %! unlink (f); %! fid = fopen (f, "w"); %! d = rand (1, 4); %! fprintf (fid, " %f %f %f %f ", d); %! fclose (fid); %! A = textread (f, ""); %! unlink (f); %! assert (A, d, 1e-6); %% Empty format, corner case = one line w/o EOL %!test %! f = tmpnam (); %! unlink (f); %! fid = fopen (f, "w"); %! d = rand (1, 4); %! fprintf (fid, " %f %f %f %f ", d); %! fclose (fid); %! A = textread (f, ""); %! unlink (f); %! assert (A, d, 1e-6); %% Read multiple lines using empty format string, missing data (should be 0) %!test %! f = tmpnam (); %! unlink (f); %! fid = fopen (f, "w"); %! d = rand (1, 4); %! fprintf (fid, "%f, %f, , %f, %f ", d); %! fclose (fid); %! A = textread (f, ""); %! unlink (f); %! assert (A, [ d(1:2) 0 d(3:4)], 1e-6); %% Test with empty positions - ML returns 0 for empty fields %!test %! f = tmpnam (); %! unlink (f); %! fid = fopen (f, "w"); %! d = rand (1, 4); %! fprintf (fid, ",2,,4\n5,,7,\n"); %! fclose (fid); %! A = textread (f, "", "delimiter", ","); %! unlink (f); %! assert (A, [0 2 0 4; 5 0 7 0], 1e-6); %% Another test with empty format + positions, now with more incomplete lower %% row (must be appended with zeros to get rectangular matrix) %!test %! f = tmpnam (); %! unlink (f); %! fid = fopen (f, "w"); %! d = rand (1, 4); %! fprintf (fid, ",2,,4\n5,\n"); %! fclose (fid); %! A = textread (f, "", "delimiter", ","); %! unlink (f); %! assert (A, [0 2 0 4; 5 0 0 0], 1e-6); %% Test input validation %!error textread () %!error textread (1) %!error <arguments must be strings> textread (1, "%f") %!error <arguments must be strings> textread ("fname", 1) %!error <missing or illegal value for> textread (file_in_loadpath ("textread.m"), "", "headerlines") %!error <missing or illegal value for> textread (file_in_loadpath ("textread.m"), "", "headerlines", 'hh') %!error <character value required for> textread (file_in_loadpath ("textread.m"), "%s", "endofline", true)