comparison scripts/io/textread.m @ 14565:98aaebc56d7c

2012-03-25 Philip Nienhuis <prnienhuis@users.sf.net> * textscan.m, textread.m Updated texinfo header (@var{n} format repeat count section) Replaced slow fgets / str concat section by block reading Supplied varargout in some cases to avoid unneeded errors Improvements to coding style * textscan.m Moved some code upward to avoid having multiple fclose statements
author Philip Nienhuis <prnienhuis@@users.sf.net>
date Fri, 30 Mar 2012 18:44:01 +0200
parents df5488e46dca
children 1804d5422f61
comparison
equal deleted inserted replaced
14564:a459d42bb0a6 14565:98aaebc56d7c
39 ## Specify a single character or "\r\n". If no value is given, it will be 39 ## Specify a single character or "\r\n". If no value is given, it will be
40 ## inferred from the file. If set to "" (empty string) EOLs are ignored as 40 ## inferred from the file. If set to "" (empty string) EOLs are ignored as
41 ## delimiters. 41 ## delimiters.
42 ## @end itemize 42 ## @end itemize
43 ## 43 ##
44 ## The optional input @var{n} specifes the number of times to use 44 ## The optional input @var{n} specifes the number of data lines to read; in
45 ## @var{format} when parsing, i.e., the format repeat count. 45 ## this sense it differs slightly from the format repeat count in strread.
46 ## 46 ##
47 ## @seealso{strread, load, dlmread, fscanf, textscan} 47 ## @seealso{strread, load, dlmread, fscanf, textscan}
48 ## @end deftypefn 48 ## @end deftypefn
49 49
50 function varargout = textread (filename, format = "%f", varargin) 50 function varargout = textread (filename, format = "%f", varargin)
51
52 BUFLENGTH = 4096; # Read buffer to speed up processing @var{n}
51 53
52 ## Check input 54 ## Check input
53 if (nargin < 1) 55 if (nargin < 1)
54 print_usage (); 56 print_usage ();
55 endif 57 endif
56 58
57 if (! ischar (filename) || ! ischar (format)) 59 if (! ischar (filename) || ! ischar (format))
58 error ("textread: FILENAME and FORMAT arguments must be strings"); 60 error ("textread: FILENAME and FORMAT arguments must be strings");
61 endif
62
63 if (! isempty (varargin) && isnumeric (varargin{1}))
64 nlines = varargin{1};
65 else
66 nlines = Inf;
67 endif
68 if (nlines < 1)
69 printf ("textread: N = 0, no data read\n");
70 varargout = cell (1, nargout);
71 return
59 endif 72 endif
60 73
61 ## Read file 74 ## Read file
62 fid = fopen (filename, "r"); 75 fid = fopen (filename, "r");
63 if (fid == -1) 76 if (fid == -1)
69 ## Beware of zero valued headerline, fskipl would skip to EOF 82 ## Beware of zero valued headerline, fskipl would skip to EOF
70 if (! isempty (headerlines) && (varargin{headerlines + 1} > 0)) 83 if (! isempty (headerlines) && (varargin{headerlines + 1} > 0))
71 fskipl (fid, varargin{headerlines + 1}); 84 fskipl (fid, varargin{headerlines + 1});
72 varargin(headerlines:headerlines+1) = []; 85 varargin(headerlines:headerlines+1) = [];
73 endif 86 endif
74 87 st_pos = ftell (fid);
75 if (! isempty (varargin) && isnumeric (varargin{1}))
76 nlines = varargin{1};
77 else
78 nlines = Inf;
79 endif
80 88
81 if (isfinite (nlines) && (nlines >= 0)) 89 ## Read a first file chunk. Rest follows after endofline processing
82 str = tmp_str = ""; 90 [str, count] = fscanf (fid, "%c", BUFLENGTH);
83 n = 0; 91 if (isempty (str) || count < 1)
84 ## FIXME: Can this be done without slow loop?
85 while (ischar (tmp_str) && n++ <= nlines)
86 str = strcat (str, tmp_str);
87 tmp_str = fgets (fid);
88 endwhile
89 else
90 str = fread (fid, "char=>char").';
91 endif
92 fclose (fid);
93
94 if (isempty (str))
95 warning ("textread: empty file"); 92 warning ("textread: empty file");
93 varargout = cell (1, nargout);
96 return; 94 return;
97 endif 95 endif
98 96
99 endofline = find (strcmpi (varargin, "endofline"), 1); 97 endofline = find (strcmpi (varargin, "endofline"), 1);
100 if (! isempty (endofline)) 98 if (! isempty (endofline))
101 ## 'endofline' option set by user. 99 ## 'endofline' option set by user.
102 if (! ischar (varargin{endofline + 1})); 100 if (! ischar (varargin{endofline + 1}));
103 error ("textread: character value required for EndOfLine"); 101 error ("textread: character value required for EndOfLine");
104 endif 102 endif
105 else 103 else
106 ## Determine EOL from file. Search for EOL candidates in first 3000 chars 104 ## Determine EOL from file. Search for EOL candidates in first BUFLENGTH chars
107 eol_srch_len = min (length (str), 3000); 105 eol_srch_len = min (length (str), BUFLENGTH);
108 ## First try DOS (CRLF) 106 ## First try DOS (CRLF)
109 if (! isempty (strfind ("\r\n", str(1 : eol_srch_len)))) 107 if (! isempty (strfind ("\r\n", str(1 : eol_srch_len))))
110 eol_char = "\r\n"; 108 eol_char = "\r\n";
111 ## Perhaps old Macintosh? (CR) 109 ## Perhaps old Macintosh? (CR)
112 elseif (! isempty (strfind ("\r", str(1 : eol_srch_len)))) 110 elseif (! isempty (strfind ("\r", str(1 : eol_srch_len))))
114 ## Otherwise, use plain UNIX (LF) 112 ## Otherwise, use plain UNIX (LF)
115 else 113 else
116 eol_char = "\n"; 114 eol_char = "\n";
117 endif 115 endif
118 ## Set up default endofline param value 116 ## Set up default endofline param value
119 varargin(end+1:end+2) = {'endofline', eol_char}; 117 varargin(end+1:end+2) = {"endofline", eol_char};
120 endif 118 endif
121 119
120 ## Now that we know what EOL looks like, we can process format_repeat_count.
121 ## FIXME The below isn't ML-compatible: counts lines, not format string uses
122 if (isfinite (nlines) && (nlines > 0))
123 l_eol_char = length (eol_char);
124 eoi = findstr (str, eol_char);
125 n_eoi = length (eoi);
126 nblks = 0;
127 ## Avoid slow repeated str concatenation, first seek requested end of data
128 while (n_eoi < nlines && count == BUFLENGTH)
129 [nstr, count] = fscanf (fid, "%c", BUFLENGTH);
130 if (count > 0)
131 ## Watch out for multichar EOL being missed across buffer boundaries
132 if (l_eol_char > 1)
133 str = [str(end - length (eol_char) + 2 : end) nstr];
134 else
135 str = nstr;
136 endif
137 eoi = findstr (str, eol_char);
138 n_eoi += numel (eoi);
139 ++nblks;
140 endif
141 endwhile
142 ## Found EOL delimiting last requested line. Compute ptr (incl. EOL)
143 if (isempty (eoi))
144 printf ("textread: format repeat count specified but no endofline found\n");
145 eoi_pos = nblks * BUFLENGTH + count;
146 else
147 eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi);
148 endif
149 fseek (fid, st_pos, "bof");
150 str = fscanf (fid, "%c", eoi_pos);
151 else
152 fseek (fid, st_pos, "bof");
153 str = fread(fid, "char=>char").';
154 endif
155 fclose (fid);
156
122 ## Set up default whitespace param value if needed 157 ## Set up default whitespace param value if needed
123 if (isempty (find (strcmpi ('whitespace', varargin)))) 158 if (isempty (find (strcmpi ("whitespace", varargin))))
124 varargin(end+1:end+2) = {'whitespace', " \b\t"}; 159 varargin(end+1:end+2) = {"whitespace", " \b\t"};
125 endif 160 endif
126 161
127 ## Call strread to make it do the real work 162 ## Call strread to make it do the real work
128 [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:}); 163 [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:});
129 164