octave-nkf: scripts/io/textread.m comparison

comparison scripts/io/textread.m @ 14565:98aaebc56d7c

2012-03-25 Philip Nienhuis <prnienhuis@users.sf.net> * textscan.m, textread.m Updated texinfo header (@var{n} format repeat count section) Replaced slow fgets / str concat section by block reading Supplied varargout in some cases to avoid unneeded errors Improvements to coding style * textscan.m Moved some code upward to avoid having multiple fclose statements

author	Philip Nienhuis <prnienhuis@@users.sf.net>
date	Fri, 30 Mar 2012 18:44:01 +0200
parents	df5488e46dca
children	1804d5422f61

comparison

equal deleted inserted replaced

-:a459d42bb0a6
+:98aaebc56d7c
 ## Specify a single character or "\r\n".  If no value is given, it will be
 ## inferred from the file.  If set to "" (empty string) EOLs are ignored as
 ## delimiters.
 ## @end itemize
 ##
-## The optional input @var{n} specifes the number of times to use
+## The optional input @var{n} specifes the number of data lines to read; in
-## @var{format} when parsing, i.e., the format repeat count.
+## this sense it differs slightly from the format repeat count in strread.
 ##
 ## @seealso{strread, load, dlmread, fscanf, textscan}
 ## @end deftypefn
 function varargout = textread (filename, format = "%f", varargin)
+BUFLENGTH = 4096;       # Read buffer to speed up processing @var{n}
 ## Check input
 if (nargin < 1)
 print_usage ();
 endif
 if (! ischar (filename) || ! ischar (format))
 error ("textread: FILENAME and FORMAT arguments must be strings");
+endif
+if (! isempty (varargin) && isnumeric (varargin{1}))
+nlines = varargin{1};
+else
+nlines = Inf;
+endif
+if (nlines < 1)
+printf ("textread: N = 0, no data read\n");
+varargout = cell (1, nargout);
+return
 endif
 ## Read file
 fid = fopen (filename, "r");
 if (fid == -1)
 ## Beware of zero valued headerline, fskipl would skip to EOF
 if (! isempty (headerlines) && (varargin{headerlines + 1} > 0))
 fskipl (fid, varargin{headerlines + 1});
 varargin(headerlines:headerlines+1) = [];
 endif
+st_pos = ftell (fid);
-if (! isempty (varargin) && isnumeric (varargin{1}))
-nlines = varargin{1};
-else
-nlines = Inf;
-endif
-if (isfinite (nlines) && (nlines >= 0))
+## Read a first file chunk. Rest follows after endofline processing
-str = tmp_str = "";
+[str, count] = fscanf (fid, "%c", BUFLENGTH);
-n = 0;
+if (isempty (str) || count < 1)
-## FIXME: Can this be done without slow loop?
-while (ischar (tmp_str) && n++ <= nlines)
-str = strcat (str, tmp_str);
-tmp_str = fgets (fid);
-endwhile
-else
-str = fread (fid, "char=>char").';
-endif
-fclose (fid);
-if (isempty (str))
 warning ("textread: empty file");
+varargout = cell (1, nargout);
 return;
 endif
 endofline = find (strcmpi (varargin, "endofline"), 1);
 if (! isempty (endofline))
 ## 'endofline' option set by user.
 if (! ischar (varargin{endofline + 1}));
 error ("textread: character value required for EndOfLine");
 endif
 else
-## Determine EOL from file.  Search for EOL candidates in first 3000 chars
+## Determine EOL from file.  Search for EOL candidates in first BUFLENGTH chars
-eol_srch_len = min (length (str), 3000);
+eol_srch_len = min (length (str), BUFLENGTH);
 ## First try DOS (CRLF)
 if (! isempty (strfind ("\r\n", str(1 : eol_srch_len))))
 eol_char = "\r\n";
 ## Perhaps old Macintosh? (CR)
 elseif (! isempty (strfind ("\r", str(1 : eol_srch_len))))
 ## Otherwise, use plain UNIX (LF)
 else
 eol_char = "\n";
 endif
 ## Set up default endofline param value
-varargin(end+1:end+2) = {'endofline', eol_char};
+varargin(end+1:end+2) = {"endofline", eol_char};
 endif
+## Now that we know what EOL looks like, we can process format_repeat_count.
+## FIXME The below isn't ML-compatible: counts lines, not format string uses
+if (isfinite (nlines) && (nlines > 0))
+l_eol_char = length (eol_char);
+eoi = findstr (str, eol_char);
+n_eoi = length (eoi);
+nblks = 0;
+## Avoid slow repeated str concatenation, first seek requested end of data
+while (n_eoi < nlines && count == BUFLENGTH)
+[nstr, count] = fscanf (fid, "%c", BUFLENGTH);
+if (count > 0)
+## Watch out for multichar EOL being missed across buffer boundaries
+if (l_eol_char > 1)
+str = [str(end - length (eol_char) + 2 : end) nstr];
+else
+str = nstr;
+endif
+eoi = findstr (str, eol_char);
+n_eoi += numel (eoi);
+++nblks;
+endif
+endwhile
+## Found EOL delimiting last requested line. Compute ptr (incl. EOL)
+if (isempty (eoi))
+printf ("textread: format repeat count specified but no endofline found\n");
+eoi_pos = nblks * BUFLENGTH + count;
+else
+eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi);
+endif
+fseek (fid, st_pos, "bof");
+str = fscanf (fid, "%c", eoi_pos);
+else
+fseek (fid, st_pos, "bof");
+str = fread(fid, "char=>char").';
+endif
+fclose (fid);
 ## Set up default whitespace param value if needed
-if (isempty (find (strcmpi ('whitespace', varargin))))
+if (isempty (find (strcmpi ("whitespace", varargin))))
-varargin(end+1:end+2) = {'whitespace', " \b\t"};
+varargin(end+1:end+2) = {"whitespace", " \b\t"};
 endif
 ## Call strread to make it do the real work
 [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:});

Mercurial > octave-nkf

comparison scripts/io/textread.m @ 14565:98aaebc56d7c