comparison scripts/io/textread.m @ 16357:0cbe330f39a2

textscan.m, textread.m: allow reading multi-column data files with empty format + tests (bug #38317)
author Philip Nienhuis <prnienhuis@users.sf.net>
date Fri, 22 Mar 2013 17:46:04 +0100
parents 9c4ac8f25a8c
children 12005245b645
comparison
equal deleted inserted replaced
16356:df643a532b61 16357:0cbe330f39a2
41 ## delimiters. 41 ## delimiters.
42 ## @end itemize 42 ## @end itemize
43 ## 43 ##
44 ## The optional input @var{n} specifies the number of data lines to read; in 44 ## The optional input @var{n} specifies the number of data lines to read; in
45 ## this sense it differs slightly from the format repeat count in strread. 45 ## this sense it differs slightly from the format repeat count in strread.
46 ##
47 ## If the format string is empty (not: omitted) and the file contains only
48 ## numeric data (excluding headerlines), textread will return a rectangular
49 ## matrix with the number of columns matching the number of numeric fields on
50 ## the first data line of the file. Empty fields are returned as zero values.
46 ## 51 ##
47 ## @seealso{strread, load, dlmread, fscanf, textscan} 52 ## @seealso{strread, load, dlmread, fscanf, textscan}
48 ## @end deftypefn 53 ## @end deftypefn
49 54
50 function varargout = textread (filename, format = "%f", varargin) 55 function varargout = textread (filename, format = "%f", varargin)
172 endif 177 endif
173 178
174 ## Call strread to make it do the real work 179 ## Call strread to make it do the real work
175 [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:}); 180 [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:});
176 181
182 ## Hack to concatenate/reshape numeric output into 2D array (undocumented ML)
183 ## In ML this only works in case of an empty format string
184 if (isempty (format))
185 ## Get number of fields per line.
186 ## 1. Get eol_char position
187 iwhsp = find (strcmpi ("whitespace", varargin));
188 whsp = varargin{iwhsp + 1};
189 idx = regexp (str, eol_char, "once");
190 ## 2. Get first data line til EOL. Avoid corner case of just one line
191 if (! isempty (idx))
192 str = str(1:idx-1);
193 endif
194 idelimiter = find (strcmpi (varargin, "delimiter"), 1);
195 if (isempty (idelimiter))
196 ## Assume delimiter = whitespace
197 ## 3A. whitespace incl. consecutive whitespace => single space
198 str = regexprep (str, sprintf ("[%s]+", whsp), ' ');
199 ## 4A. Remove possible leading & trailing spaces
200 str = strtrim (str);
201 ## 5A. Count spaces, add one to get nr of data fields per line
202 ncols = numel (strfind (str, " ")) + 1;
203 else
204 ## 3B. Just count delimiters. FIXME: delimiters could occur in literals
205 delimiter = varargin {idelimiter+1};
206 ncols = numel (regexp (str, sprintf ("[%s]", delimiter))) + 1;
207 endif
208 ## 6. Reshape; watch out, we need a transpose
209 nrows = ceil (numel (varargout{1}) / ncols);
210 pad = mod (numel (varargout{1}), ncols);
211 if (pad > 0)
212 pad = ncols - pad;
213 varargout{1}(end+1 : end+pad) = NaN;
214 endif
215 varargout{1} = reshape (varargout{1}, ncols, nrows)';
216 ## ML replaces empty values with NaNs
217 varargout{1}(find (isnan (varargout{1}))) = 0;
218 endif
219
177 endfunction 220 endfunction
178
179 221
180 %!test 222 %!test
181 %! f = tmpnam (); 223 %! f = tmpnam ();
182 %! d = rand (5, 3); 224 %! d = rand (5, 3);
183 %! dlmwrite (f, d, "precision", "%5.2f"); 225 %! dlmwrite (f, d, "precision", "%5.2f");
192 %! d = rand (7, 2); 234 %! d = rand (7, 2);
193 %! dlmwrite (f, d, "precision", "%5.2f"); 235 %! dlmwrite (f, d, "precision", "%5.2f");
194 %! [a, b] = textread (f, "%f, %f", "headerlines", 1); 236 %! [a, b] = textread (f, "%f, %f", "headerlines", 1);
195 %! unlink (f); 237 %! unlink (f);
196 %! assert (a, d(2:7, 1), 1e-2); 238 %! assert (a, d(2:7, 1), 1e-2);
239
240 %% Test reading 2D matrix with empty format
241 %!test
242 %! f = tmpnam ();
243 %! d = rand (5, 2);
244 %! dlmwrite (f, d, "precision", "%5.2f");
245 %! A = textread (f, "", "headerlines", 3);
246 %! unlink (f);
247 %! assert (A, d(4:5, :), 1e-2);
248
249 %% Read multiple lines using empty format string
250 %!test
251 %! f = tmpnam ();
252 %! unlink (f);
253 %! fid = fopen (f, "w");
254 %! d = rand (1, 4);
255 %! fprintf (fid, " %f %f %f %f ", d);
256 %! fclose (fid);
257 %! A = textread (f, "");
258 %! unlink (f);
259 %! assert (A, d, 1e-6);
260
261 %% Empty format, corner case = one line w/o EOL
262 %!test
263 %! f = tmpnam ();
264 %! unlink (f);
265 %! fid = fopen (f, "w");
266 %! d = rand (1, 4);
267 %! fprintf (fid, " %f %f %f %f ", d);
268 %! fclose (fid);
269 %! A = textread (f, "");
270 %! unlink (f);
271 %! assert (A, d, 1e-6);
272
273 %% Read multiple lines using empty format string, missing data (should be 0)
274 %!test
275 %! f = tmpnam ();
276 %! unlink (f);
277 %! fid = fopen (f, "w");
278 %! d = rand (1, 4);
279 %! fprintf (fid, "%f, %f, , %f, %f ", d);
280 %! fclose (fid);
281 %! A = textread (f, "");
282 %! unlink (f);
283 %! assert (A, [ d(1:2) 0 d(3:4)], 1e-6);
284
285 %% Test with empty positions - ML returns 0 for empty fields
286 %!test
287 %! f = tmpnam ();
288 %! unlink (f);
289 %! fid = fopen (f, "w");
290 %! d = rand (1, 4);
291 %! fprintf (fid, ",2,,4\n5,,7,\n");
292 %! fclose (fid);
293 %! A = textread (f, "", "delimiter", ",");
294 %! unlink (f);
295 %! assert (A, [0 2 0 4; 5 0 7 0], 1e-6);
296
297 %% Another test with empty format + positions, now with more incomplete lower
298 %% row (must be appended with zeros to get rectangular matrix)
299 %!test
300 %! f = tmpnam ();
301 %! unlink (f);
302 %! fid = fopen (f, "w");
303 %! d = rand (1, 4);
304 %! fprintf (fid, ",2,,4\n5,\n");
305 %! fclose (fid);
306 %! A = textread (f, "", "delimiter", ",");
307 %! unlink (f);
308 %! assert (A, [0 2 0 4; 5 0 0 0], 1e-6);
197 309
198 %% Test input validation 310 %% Test input validation
199 %!error textread () 311 %!error textread ()
200 %!error textread (1) 312 %!error textread (1)
201 %!error <arguments must be strings> textread (1, "%f") 313 %!error <arguments must be strings> textread (1, "%f")