comparison scripts/io/textread.m @ 20255:481fa65e5cab stable

textread.m, textscan.m: Fix handling of format repeat count (bug #45047) * textread.m: Clarify docstring describing format repeat count argument Add examples Fix bug occurring when last read line has no trailing EOL Add tests * textcan.m: Fix bug occurring when last read line has no trailing EOL
author Philip Nienhuis <prnienhuis@users.sf.net>
date Sun, 24 May 2015 22:47:55 +0200
parents 935832827f47
children 8a25649b9c77
comparison
equal deleted inserted replaced
20251:9866b3202c52 20255:481fa65e5cab
40 ## @qcode{"@xbackslashchar{}r@xbackslashchar{}n"}. If no value is given, it 40 ## @qcode{"@xbackslashchar{}r@xbackslashchar{}n"}. If no value is given, it
41 ## will be inferred from the file. If set to @qcode{""} (empty string) EOLs 41 ## will be inferred from the file. If set to @qcode{""} (empty string) EOLs
42 ## are ignored as delimiters. 42 ## are ignored as delimiters.
43 ## @end itemize 43 ## @end itemize
44 ## 44 ##
45 ## The optional input @var{n} specifies the number of data lines to read; in 45 ## The optional input @var{n} (format repeat count) specifies the number of
46 ## this sense it differs slightly from the format repeat count in strread. 46 ## times the format string is to be used or the number of lines to be read,
47 ## whichever happens first while reading. The former is equivalent to
48 ## requesting that the data output vectors should be of length @var{N}.
49 ## Note that when reading files with format strings referring to multiple
50 ## lines, @var{n} should rather be the number of lines to be read than the
51 ## number of format string uses.
47 ## 52 ##
48 ## If the format string is empty (not just omitted) and the file contains only 53 ## If the format string is empty (not just omitted) and the file contains only
49 ## numeric data (excluding headerlines), textread will return a rectangular 54 ## numeric data (excluding headerlines), textread will return a rectangular
50 ## matrix with the number of columns matching the number of numeric fields on 55 ## matrix with the number of columns matching the number of numeric fields on
51 ## the first data line of the file. Empty fields are returned as zero values. 56 ## the first data line of the file. Empty fields are returned as zero values.
57 ##
58 ## Examples:
59 ##
60 ## @example
61 ## Assume a data file like:
62 ## 1 a 2 b
63 ## 3 c 4 d
64 ## 5 e
65 ## @end example
66 ##
67 ## @example
68 ## [a, b] = textread (f, "%f %s")
69 ## returns two columns of data, one with doubles, the other a
70 ## cellstr array:
71 ## a = [1; 2; 3; 4; 5]´
72 ## b = {"a"; "b"; "c"; "d"; "e"}
73 ## @end example
74 ##
75 ## @example
76 ## [a, b] = textread (f, "%f %s", 3)
77 ## (read data into two culumns, try to use the format string
78 ## three times)
79 ## returns
80 ## a = [1; 2; 3]´
81 ## b = {"a"; "b"; "c"}
82 ##
83 ## @end example
84 ##
85 ## @example
86 ## With a data file like:
87 ## 1
88 ## a
89 ## 2
90 ## b
91 ##
92 ## [a, b] = textread (f, "%f %s", 2)
93 ## returns a = 1 and b = {"a"}; i.e., the format string is used
94 ## only once because the format string refers to 2 lines of the
95 ## data file. To obtain 2x1 data output columns, specify N = 4
96 ## (number of data lines containing all requested data) rather
97 ## than 2.
98 ## @end example
52 ## 99 ##
53 ## @seealso{strread, load, dlmread, fscanf, textscan} 100 ## @seealso{strread, load, dlmread, fscanf, textscan}
54 ## @end deftypefn 101 ## @end deftypefn
55 102
56 function varargout = textread (filename, format = "%f", varargin) 103 function varargout = textread (filename, format = "%f", varargin)
124 error ("character value required for EndOfLine"); 171 error ("character value required for EndOfLine");
125 endif 172 endif
126 else 173 else
127 ## Determine EOL from file. 174 ## Determine EOL from file.
128 ## Search for EOL candidates in the first BUFLENGTH chars 175 ## Search for EOL candidates in the first BUFLENGTH chars
176 ## FIXME Ignore risk of 2-byte EOL (\r\n) being split at exactly BUFLENGTH
129 eol_srch_len = min (length (str), BUFLENGTH); 177 eol_srch_len = min (length (str), BUFLENGTH);
130 ## First try DOS (CRLF) 178 ## First try DOS (CRLF)
131 if (! isempty (strfind (str(1 : eol_srch_len), "\r\n"))) 179 if (! isempty (strfind (str(1 : eol_srch_len), "\r\n")))
132 eol_char = "\r\n"; 180 eol_char = "\r\n";
133 ## Perhaps old Macintosh? (CR) 181 ## Perhaps old Macintosh? (CR)
161 eoi = findstr (str, eol_char); 209 eoi = findstr (str, eol_char);
162 n_eoi += numel (eoi); 210 n_eoi += numel (eoi);
163 ++nblks; 211 ++nblks;
164 endif 212 endif
165 endwhile 213 endwhile
214 ## Handle case of missing or incomplete trailing EOL
215 if (! strcmp (str(end - length (eol_char) + 1 : end), eol_char))
216 eoi = [ eoi (length (str)) ];
217 ++n_eoi;
218 endif
166 ## Found EOL delimiting last requested line. Compute ptr (incl. EOL) 219 ## Found EOL delimiting last requested line. Compute ptr (incl. EOL)
167 if (isempty (eoi)) 220 if (isempty (eoi))
168 disp ("textread: format repeat count specified but no endofline found");
169 eoi_pos = nblks * BUFLENGTH + count; 221 eoi_pos = nblks * BUFLENGTH + count;
170 else 222 else
171 eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi); 223 eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi);
172 endif 224 endif
173 fseek (fid, st_pos, "bof"); 225 fseek (fid, st_pos, "bof");
276 %! fclose (fid); 328 %! fclose (fid);
277 %! A = textread (f, ""); 329 %! A = textread (f, "");
278 %! unlink (f); 330 %! unlink (f);
279 %! assert (A, d, 1e-6); 331 %! assert (A, d, 1e-6);
280 332
333 ## Tests with format repeat count #1
334 %!test
335 %! f = tempname ();
336 %! fid = fopen (f, "w");
337 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d %s \n", ...
338 %! 10, "a", 20, "b", 30, "c", 40, "d");
339 %! fclose (fid);
340 %! [a, b] = textread (f, "%d %s", 1);
341 %! assert (a, int32 (10));
342 %! assert (b, {"a"});
343 %! [a, b] = textread (f, "%d %s", 2);
344 %! assert (a, int32 ([10; 20]));
345 %! assert (b, {"a"; "b"});
346 %! [a, b] = textread (f, "%d %s", 3);
347 %! assert (a, int32 ([10; 20; 30]));
348 %! assert (b, {"a"; "b"; "c"});
349 %! [a, b] = textread (f, "%d %s", 4);
350 %! assert (a, int32 ([10; 20; 30; 40]));
351 %! assert (b, {"a"; "b"; "c"; "d"});
352 %! [a, b] = textread (f, "%d %s", 5);
353 %! assert (a, int32 ([10; 20; 30; 40]));
354 %! assert (b, {"a"; "b"; "c"; "d"});
355 %! unlink (f);
356
357 ## Tests with format repeat count #2, missing last EOL
358 %!test
359 %! f = tempname ();
360 %! fid = fopen (f, "w");
361 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d %s", ...
362 %! 10, "a", 20, "b", 30, "c", 40, "d");
363 %! fclose (fid);
364 %! [a, b] = textread (f, "%d %s", 1);
365 %! assert (a, int32 (10));
366 %! assert (b, {"a"});
367 %! [a, b] = textread (f, "%d %s", 2);
368 %! assert (a, int32 ([10; 20]));
369 %! assert (b, {"a"; "b"});
370 %! [a, b] = textread (f, "%d %s", 3);
371 %! assert (a, int32 ([10; 20; 30]));
372 %! assert (b, {"a"; "b"; "c"});
373 %! [a, b] = textread (f, "%d %s", 4);
374 %! assert (a, int32 ([10; 20; 30; 40]));
375 %! assert (b, {"a"; "b"; "c"; "d"});
376 %! [a, b] = textread (f, "%d %s", 5);
377 %! assert (a, int32 ([10; 20; 30; 40]));
378 %! assert (b, {"a"; "b"; "c"; "d"});
379 %! unlink (f);
380
381 ## Tests with format repeat count #3, incomplete last line
382 %!test
383 %! f = tempname ();
384 %! fid = fopen (f, "w");
385 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d", ...
386 %! 10, "a", 20, "b", 30, "c", 40);
387 %! fclose (fid);
388 %! [a, b] = textread (f, "%d %s", 1);
389 %! assert (a, int32 (10));
390 %! assert (b, {"a"});
391 %! [a, b] = textread (f, "%d %s", 2);
392 %! assert (a, int32 ([10; 20]));
393 %! assert (b, {"a"; "b"});
394 %! [a, b] = textread (f, "%d %s", 3);
395 %! assert (a, int32 ([10; 20; 30]));
396 %! assert (b, {"a"; "b"; "c"});
397 %! [a, b] = textread (f, "%d %s", 4);
398 %! assert (a, int32 ([10; 20; 30; 40]));
399 %! assert (b, {"a"; "b"; "c"});
400 %! [a, b] = textread (f, "%d %s", 5);
401 %! assert (a, int32 ([10; 20; 30; 40]));
402 %! assert (b, {"a"; "b"; "c"});
403 %! unlink (f);
404
405 ## Tests with format repeat count #4, incomplete last line but with trailing EOL
406 %!test
407 %! f = tempname ();
408 %! fid = fopen (f, "w");
409 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d\n", ...
410 %! 10, "a", 20, "b", 30, "c", 40);
411 %! fclose (fid);
412 %! [a, b] = textread (f, "%d %s", 4);
413 %! assert (a, int32 ([10; 20; 30; 40]));
414 %! assert (b, {"a"; "b"; "c"; ""});
415 #%! [a, b] = textread (f, "%d %s", 5);
416 #%! assert (a, int32 ([10; 20; 30; 40]));
417 #%! assert (b, {"a"; "b"; "c"; ""});
418 %! unlink (f);
419
420 ## Tests with format repeat count #5, nr of data lines = limiting factor
421 %!test
422 %! f = tempname ();
423 %! fid = fopen (f, "w");
424 %! fprintf (fid, "%2d\n%s\n%2dn%s", ...
425 %! 1, "a", 2, "b");
426 %! fclose (fid);
427 %! [a, b] = textread (f, "%d %s", 2);
428 %! assert (a, int32 (1));
429 %! assert (b, {"a"});
430
281 ## Read multiple lines using empty format string, missing data (should be 0) 431 ## Read multiple lines using empty format string, missing data (should be 0)
282 %!test 432 %!test
283 %! f = tempname (); 433 %! f = tempname ();
284 %! unlink (f); 434 %! unlink (f);
285 %! fid = fopen (f, "w"); 435 %! fid = fopen (f, "w");