comparison scripts/io/textread.m @ 20270:3e8c188b20a7

maint: Periodic merge of stable to default.
author John W. Eaton <jwe@octave.org>
date Sat, 30 May 2015 05:39:47 -0400
parents 83792dd9bcc1 8a25649b9c77
children 5fc798a9b32c
comparison
equal deleted inserted replaced
20268:e8da1c10194f 20270:3e8c188b20a7
40 ## @qcode{"@xbackslashchar{}r@xbackslashchar{}n"}. If no value is given, it 40 ## @qcode{"@xbackslashchar{}r@xbackslashchar{}n"}. If no value is given, it
41 ## will be inferred from the file. If set to @qcode{""} (empty string) EOLs 41 ## will be inferred from the file. If set to @qcode{""} (empty string) EOLs
42 ## are ignored as delimiters. 42 ## are ignored as delimiters.
43 ## @end itemize 43 ## @end itemize
44 ## 44 ##
45 ## The optional input @var{n} specifies the number of data lines to read; in 45 ## The optional input @var{n} (format repeat count) specifies the number of
46 ## this sense it differs slightly from the format repeat count in strread. 46 ## times the format string is to be used or the number of lines to be read,
47 ## whichever happens first while reading. The former is equivalent to
48 ## requesting that the data output vectors should be of length @var{N}.
49 ## Note that when reading files with format strings referring to multiple
50 ## lines, @var{n} should rather be the number of lines to be read than the
51 ## number of format string uses.
47 ## 52 ##
48 ## If the format string is empty (not just omitted) and the file contains only 53 ## If the format string is empty (not just omitted) and the file contains only
49 ## numeric data (excluding headerlines), textread will return a rectangular 54 ## numeric data (excluding headerlines), textread will return a rectangular
50 ## matrix with the number of columns matching the number of numeric fields on 55 ## matrix with the number of columns matching the number of numeric fields on
51 ## the first data line of the file. Empty fields are returned as zero values. 56 ## the first data line of the file. Empty fields are returned as zero values.
57 ##
58 ## Examples:
59 ##
60 ## @example
61 ## Assume a data file like:
62 ## 1 a 2 b
63 ## 3 c 4 d
64 ## 5 e
65 ## @end example
66 ##
67 ## @example
68 ## [a, b] = textread (f, "%f %s")
69 ## returns two columns of data, one with doubles, the other a
70 ## cellstr array:
71 ## a = [1; 2; 3; 4; 5]
72 ## b = @{"a"; "b"; "c"; "d"; "e"@}
73 ## @end example
74 ##
75 ## @example
76 ## [a, b] = textread (f, "%f %s", 3)
77 ## (read data into two culumns, try to use the format string
78 ## three times)
79 ## returns
80 ## a = [1; 2; 3]
81 ## b = @{"a"; "b"; "c"@}
82 ##
83 ## @end example
84 ##
85 ## @example
86 ## With a data file like:
87 ## 1
88 ## a
89 ## 2
90 ## b
91 ##
92 ## [a, b] = textread (f, "%f %s", 2)
93 ## returns a = 1 and b = @{"a"@}; i.e., the format string is used
94 ## only once because the format string refers to 2 lines of the
95 ## data file. To obtain 2x1 data output columns, specify N = 4
96 ## (number of data lines containing all requested data) rather
97 ## than 2.
98 ## @end example
52 ## 99 ##
53 ## @seealso{strread, load, dlmread, fscanf, textscan} 100 ## @seealso{strread, load, dlmread, fscanf, textscan}
54 ## @end deftypefn 101 ## @end deftypefn
55 102
56 function varargout = textread (filename, format = "%f", varargin) 103 function varargout = textread (filename, format = "%f", varargin)
123 error ("character value required for EndOfLine"); 170 error ("character value required for EndOfLine");
124 endif 171 endif
125 else 172 else
126 ## Determine EOL from file. 173 ## Determine EOL from file.
127 ## Search for EOL candidates in the first BUFLENGTH chars 174 ## Search for EOL candidates in the first BUFLENGTH chars
175 ## FIXME Ignore risk of 2-byte EOL (\r\n) being split at exactly BUFLENGTH
128 eol_srch_len = min (length (str), BUFLENGTH); 176 eol_srch_len = min (length (str), BUFLENGTH);
129 ## First try DOS (CRLF) 177 ## First try DOS (CRLF)
130 if (! isempty (strfind (str(1 : eol_srch_len), "\r\n"))) 178 if (! isempty (strfind (str(1 : eol_srch_len), "\r\n")))
131 eol_char = "\r\n"; 179 eol_char = "\r\n";
132 ## Perhaps old Macintosh? (CR) 180 ## Perhaps old Macintosh? (CR)
160 eoi = findstr (str, eol_char); 208 eoi = findstr (str, eol_char);
161 n_eoi += numel (eoi); 209 n_eoi += numel (eoi);
162 ++nblks; 210 ++nblks;
163 endif 211 endif
164 endwhile 212 endwhile
213 ## Handle case of missing or incomplete trailing EOL
214 if (! strcmp (str(end - length (eol_char) + 1 : end), eol_char))
215 eoi = [ eoi (length (str)) ];
216 ++n_eoi;
217 endif
165 ## Found EOL delimiting last requested line. Compute ptr (incl. EOL) 218 ## Found EOL delimiting last requested line. Compute ptr (incl. EOL)
166 if (isempty (eoi)) 219 if (isempty (eoi))
167 disp ("textread: format repeat count specified but no endofline found");
168 eoi_pos = nblks * BUFLENGTH + count; 220 eoi_pos = nblks * BUFLENGTH + count;
169 else 221 else
170 eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi); 222 eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi);
171 endif 223 endif
172 fseek (fid, st_pos, "bof"); 224 fseek (fid, st_pos, "bof");
275 %! fclose (fid); 327 %! fclose (fid);
276 %! A = textread (f, ""); 328 %! A = textread (f, "");
277 %! unlink (f); 329 %! unlink (f);
278 %! assert (A, d, 1e-6); 330 %! assert (A, d, 1e-6);
279 331
332 ## Tests with format repeat count #1
333 %!test
334 %! f = tempname ();
335 %! fid = fopen (f, "w");
336 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d %s \n", ...
337 %! 10, "a", 20, "b", 30, "c", 40, "d");
338 %! fclose (fid);
339 %! [a, b] = textread (f, "%d %s", 1);
340 %! assert (a, int32 (10));
341 %! assert (b, {"a"});
342 %! [a, b] = textread (f, "%d %s", 2);
343 %! assert (a, int32 ([10; 20]));
344 %! assert (b, {"a"; "b"});
345 %! [a, b] = textread (f, "%d %s", 3);
346 %! assert (a, int32 ([10; 20; 30]));
347 %! assert (b, {"a"; "b"; "c"});
348 %! [a, b] = textread (f, "%d %s", 4);
349 %! assert (a, int32 ([10; 20; 30; 40]));
350 %! assert (b, {"a"; "b"; "c"; "d"});
351 %! [a, b] = textread (f, "%d %s", 5);
352 %! assert (a, int32 ([10; 20; 30; 40]));
353 %! assert (b, {"a"; "b"; "c"; "d"});
354 %! unlink (f);
355
356 ## Tests with format repeat count #2, missing last EOL
357 %!test
358 %! f = tempname ();
359 %! fid = fopen (f, "w");
360 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d %s", ...
361 %! 10, "a", 20, "b", 30, "c", 40, "d");
362 %! fclose (fid);
363 %! [a, b] = textread (f, "%d %s", 1);
364 %! assert (a, int32 (10));
365 %! assert (b, {"a"});
366 %! [a, b] = textread (f, "%d %s", 2);
367 %! assert (a, int32 ([10; 20]));
368 %! assert (b, {"a"; "b"});
369 %! [a, b] = textread (f, "%d %s", 3);
370 %! assert (a, int32 ([10; 20; 30]));
371 %! assert (b, {"a"; "b"; "c"});
372 %! [a, b] = textread (f, "%d %s", 4);
373 %! assert (a, int32 ([10; 20; 30; 40]));
374 %! assert (b, {"a"; "b"; "c"; "d"});
375 %! [a, b] = textread (f, "%d %s", 5);
376 %! assert (a, int32 ([10; 20; 30; 40]));
377 %! assert (b, {"a"; "b"; "c"; "d"});
378 %! unlink (f);
379
380 ## Tests with format repeat count #3, incomplete last line
381 %!test
382 %! f = tempname ();
383 %! fid = fopen (f, "w");
384 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d", ...
385 %! 10, "a", 20, "b", 30, "c", 40);
386 %! fclose (fid);
387 %! [a, b] = textread (f, "%d %s", 1);
388 %! assert (a, int32 (10));
389 %! assert (b, {"a"});
390 %! [a, b] = textread (f, "%d %s", 2);
391 %! assert (a, int32 ([10; 20]));
392 %! assert (b, {"a"; "b"});
393 %! [a, b] = textread (f, "%d %s", 3);
394 %! assert (a, int32 ([10; 20; 30]));
395 %! assert (b, {"a"; "b"; "c"});
396 %! [a, b] = textread (f, "%d %s", 4);
397 %! assert (a, int32 ([10; 20; 30; 40]));
398 %! assert (b, {"a"; "b"; "c"});
399 %! [a, b] = textread (f, "%d %s", 5);
400 %! assert (a, int32 ([10; 20; 30; 40]));
401 %! assert (b, {"a"; "b"; "c"});
402 %! unlink (f);
403
404 ## Tests with format repeat count #4, incomplete last line but with trailing EOL
405 %!test
406 %! f = tempname ();
407 %! fid = fopen (f, "w");
408 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d\n", ...
409 %! 10, "a", 20, "b", 30, "c", 40);
410 %! fclose (fid);
411 %! [a, b] = textread (f, "%d %s", 4);
412 %! assert (a, int32 ([10; 20; 30; 40]));
413 %! assert (b, {"a"; "b"; "c"; ""});
414 #%! [a, b] = textread (f, "%d %s", 5);
415 #%! assert (a, int32 ([10; 20; 30; 40]));
416 #%! assert (b, {"a"; "b"; "c"; ""});
417 %! unlink (f);
418
419 ## Tests with format repeat count #5, nr of data lines = limiting factor
420 %!test
421 %! f = tempname ();
422 %! fid = fopen (f, "w");
423 %! fprintf (fid, "%2d\n%s\n%2dn%s", ...
424 %! 1, "a", 2, "b");
425 %! fclose (fid);
426 %! [a, b] = textread (f, "%d %s", 2);
427 %! assert (a, int32 (1));
428 %! assert (b, {"a"});
429
280 ## Read multiple lines using empty format string, missing data (should be 0) 430 ## Read multiple lines using empty format string, missing data (should be 0)
281 %!test 431 %!test
282 %! f = tempname (); 432 %! f = tempname ();
283 %! unlink (f); 433 %! unlink (f);
284 %! fid = fopen (f, "w"); 434 %! fid = fopen (f, "w");