Mercurial > octave-nkf
comparison scripts/io/textread.m @ 20255:481fa65e5cab stable
textread.m, textscan.m: Fix handling of format repeat count (bug #45047)
* textread.m: Clarify docstring describing format repeat count argument
Add examples
Fix bug occurring when last read line has no trailing EOL
Add tests
* textcan.m: Fix bug occurring when last read line has no trailing EOL
author | Philip Nienhuis <prnienhuis@users.sf.net> |
---|---|
date | Sun, 24 May 2015 22:47:55 +0200 |
parents | 935832827f47 |
children | 8a25649b9c77 |
comparison
equal
deleted
inserted
replaced
20251:9866b3202c52 | 20255:481fa65e5cab |
---|---|
40 ## @qcode{"@xbackslashchar{}r@xbackslashchar{}n"}. If no value is given, it | 40 ## @qcode{"@xbackslashchar{}r@xbackslashchar{}n"}. If no value is given, it |
41 ## will be inferred from the file. If set to @qcode{""} (empty string) EOLs | 41 ## will be inferred from the file. If set to @qcode{""} (empty string) EOLs |
42 ## are ignored as delimiters. | 42 ## are ignored as delimiters. |
43 ## @end itemize | 43 ## @end itemize |
44 ## | 44 ## |
45 ## The optional input @var{n} specifies the number of data lines to read; in | 45 ## The optional input @var{n} (format repeat count) specifies the number of |
46 ## this sense it differs slightly from the format repeat count in strread. | 46 ## times the format string is to be used or the number of lines to be read, |
47 ## whichever happens first while reading. The former is equivalent to | |
48 ## requesting that the data output vectors should be of length @var{N}. | |
49 ## Note that when reading files with format strings referring to multiple | |
50 ## lines, @var{n} should rather be the number of lines to be read than the | |
51 ## number of format string uses. | |
47 ## | 52 ## |
48 ## If the format string is empty (not just omitted) and the file contains only | 53 ## If the format string is empty (not just omitted) and the file contains only |
49 ## numeric data (excluding headerlines), textread will return a rectangular | 54 ## numeric data (excluding headerlines), textread will return a rectangular |
50 ## matrix with the number of columns matching the number of numeric fields on | 55 ## matrix with the number of columns matching the number of numeric fields on |
51 ## the first data line of the file. Empty fields are returned as zero values. | 56 ## the first data line of the file. Empty fields are returned as zero values. |
57 ## | |
58 ## Examples: | |
59 ## | |
60 ## @example | |
61 ## Assume a data file like: | |
62 ## 1 a 2 b | |
63 ## 3 c 4 d | |
64 ## 5 e | |
65 ## @end example | |
66 ## | |
67 ## @example | |
68 ## [a, b] = textread (f, "%f %s") | |
69 ## returns two columns of data, one with doubles, the other a | |
70 ## cellstr array: | |
71 ## a = [1; 2; 3; 4; 5]´ | |
72 ## b = {"a"; "b"; "c"; "d"; "e"} | |
73 ## @end example | |
74 ## | |
75 ## @example | |
76 ## [a, b] = textread (f, "%f %s", 3) | |
77 ## (read data into two culumns, try to use the format string | |
78 ## three times) | |
79 ## returns | |
80 ## a = [1; 2; 3]´ | |
81 ## b = {"a"; "b"; "c"} | |
82 ## | |
83 ## @end example | |
84 ## | |
85 ## @example | |
86 ## With a data file like: | |
87 ## 1 | |
88 ## a | |
89 ## 2 | |
90 ## b | |
91 ## | |
92 ## [a, b] = textread (f, "%f %s", 2) | |
93 ## returns a = 1 and b = {"a"}; i.e., the format string is used | |
94 ## only once because the format string refers to 2 lines of the | |
95 ## data file. To obtain 2x1 data output columns, specify N = 4 | |
96 ## (number of data lines containing all requested data) rather | |
97 ## than 2. | |
98 ## @end example | |
52 ## | 99 ## |
53 ## @seealso{strread, load, dlmread, fscanf, textscan} | 100 ## @seealso{strread, load, dlmread, fscanf, textscan} |
54 ## @end deftypefn | 101 ## @end deftypefn |
55 | 102 |
56 function varargout = textread (filename, format = "%f", varargin) | 103 function varargout = textread (filename, format = "%f", varargin) |
124 error ("character value required for EndOfLine"); | 171 error ("character value required for EndOfLine"); |
125 endif | 172 endif |
126 else | 173 else |
127 ## Determine EOL from file. | 174 ## Determine EOL from file. |
128 ## Search for EOL candidates in the first BUFLENGTH chars | 175 ## Search for EOL candidates in the first BUFLENGTH chars |
176 ## FIXME Ignore risk of 2-byte EOL (\r\n) being split at exactly BUFLENGTH | |
129 eol_srch_len = min (length (str), BUFLENGTH); | 177 eol_srch_len = min (length (str), BUFLENGTH); |
130 ## First try DOS (CRLF) | 178 ## First try DOS (CRLF) |
131 if (! isempty (strfind (str(1 : eol_srch_len), "\r\n"))) | 179 if (! isempty (strfind (str(1 : eol_srch_len), "\r\n"))) |
132 eol_char = "\r\n"; | 180 eol_char = "\r\n"; |
133 ## Perhaps old Macintosh? (CR) | 181 ## Perhaps old Macintosh? (CR) |
161 eoi = findstr (str, eol_char); | 209 eoi = findstr (str, eol_char); |
162 n_eoi += numel (eoi); | 210 n_eoi += numel (eoi); |
163 ++nblks; | 211 ++nblks; |
164 endif | 212 endif |
165 endwhile | 213 endwhile |
214 ## Handle case of missing or incomplete trailing EOL | |
215 if (! strcmp (str(end - length (eol_char) + 1 : end), eol_char)) | |
216 eoi = [ eoi (length (str)) ]; | |
217 ++n_eoi; | |
218 endif | |
166 ## Found EOL delimiting last requested line. Compute ptr (incl. EOL) | 219 ## Found EOL delimiting last requested line. Compute ptr (incl. EOL) |
167 if (isempty (eoi)) | 220 if (isempty (eoi)) |
168 disp ("textread: format repeat count specified but no endofline found"); | |
169 eoi_pos = nblks * BUFLENGTH + count; | 221 eoi_pos = nblks * BUFLENGTH + count; |
170 else | 222 else |
171 eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi); | 223 eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi); |
172 endif | 224 endif |
173 fseek (fid, st_pos, "bof"); | 225 fseek (fid, st_pos, "bof"); |
276 %! fclose (fid); | 328 %! fclose (fid); |
277 %! A = textread (f, ""); | 329 %! A = textread (f, ""); |
278 %! unlink (f); | 330 %! unlink (f); |
279 %! assert (A, d, 1e-6); | 331 %! assert (A, d, 1e-6); |
280 | 332 |
333 ## Tests with format repeat count #1 | |
334 %!test | |
335 %! f = tempname (); | |
336 %! fid = fopen (f, "w"); | |
337 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d %s \n", ... | |
338 %! 10, "a", 20, "b", 30, "c", 40, "d"); | |
339 %! fclose (fid); | |
340 %! [a, b] = textread (f, "%d %s", 1); | |
341 %! assert (a, int32 (10)); | |
342 %! assert (b, {"a"}); | |
343 %! [a, b] = textread (f, "%d %s", 2); | |
344 %! assert (a, int32 ([10; 20])); | |
345 %! assert (b, {"a"; "b"}); | |
346 %! [a, b] = textread (f, "%d %s", 3); | |
347 %! assert (a, int32 ([10; 20; 30])); | |
348 %! assert (b, {"a"; "b"; "c"}); | |
349 %! [a, b] = textread (f, "%d %s", 4); | |
350 %! assert (a, int32 ([10; 20; 30; 40])); | |
351 %! assert (b, {"a"; "b"; "c"; "d"}); | |
352 %! [a, b] = textread (f, "%d %s", 5); | |
353 %! assert (a, int32 ([10; 20; 30; 40])); | |
354 %! assert (b, {"a"; "b"; "c"; "d"}); | |
355 %! unlink (f); | |
356 | |
357 ## Tests with format repeat count #2, missing last EOL | |
358 %!test | |
359 %! f = tempname (); | |
360 %! fid = fopen (f, "w"); | |
361 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d %s", ... | |
362 %! 10, "a", 20, "b", 30, "c", 40, "d"); | |
363 %! fclose (fid); | |
364 %! [a, b] = textread (f, "%d %s", 1); | |
365 %! assert (a, int32 (10)); | |
366 %! assert (b, {"a"}); | |
367 %! [a, b] = textread (f, "%d %s", 2); | |
368 %! assert (a, int32 ([10; 20])); | |
369 %! assert (b, {"a"; "b"}); | |
370 %! [a, b] = textread (f, "%d %s", 3); | |
371 %! assert (a, int32 ([10; 20; 30])); | |
372 %! assert (b, {"a"; "b"; "c"}); | |
373 %! [a, b] = textread (f, "%d %s", 4); | |
374 %! assert (a, int32 ([10; 20; 30; 40])); | |
375 %! assert (b, {"a"; "b"; "c"; "d"}); | |
376 %! [a, b] = textread (f, "%d %s", 5); | |
377 %! assert (a, int32 ([10; 20; 30; 40])); | |
378 %! assert (b, {"a"; "b"; "c"; "d"}); | |
379 %! unlink (f); | |
380 | |
381 ## Tests with format repeat count #3, incomplete last line | |
382 %!test | |
383 %! f = tempname (); | |
384 %! fid = fopen (f, "w"); | |
385 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d", ... | |
386 %! 10, "a", 20, "b", 30, "c", 40); | |
387 %! fclose (fid); | |
388 %! [a, b] = textread (f, "%d %s", 1); | |
389 %! assert (a, int32 (10)); | |
390 %! assert (b, {"a"}); | |
391 %! [a, b] = textread (f, "%d %s", 2); | |
392 %! assert (a, int32 ([10; 20])); | |
393 %! assert (b, {"a"; "b"}); | |
394 %! [a, b] = textread (f, "%d %s", 3); | |
395 %! assert (a, int32 ([10; 20; 30])); | |
396 %! assert (b, {"a"; "b"; "c"}); | |
397 %! [a, b] = textread (f, "%d %s", 4); | |
398 %! assert (a, int32 ([10; 20; 30; 40])); | |
399 %! assert (b, {"a"; "b"; "c"}); | |
400 %! [a, b] = textread (f, "%d %s", 5); | |
401 %! assert (a, int32 ([10; 20; 30; 40])); | |
402 %! assert (b, {"a"; "b"; "c"}); | |
403 %! unlink (f); | |
404 | |
405 ## Tests with format repeat count #4, incomplete last line but with trailing EOL | |
406 %!test | |
407 %! f = tempname (); | |
408 %! fid = fopen (f, "w"); | |
409 %! fprintf (fid, "%2d %s %2d %s\n %2d %s %2d\n", ... | |
410 %! 10, "a", 20, "b", 30, "c", 40); | |
411 %! fclose (fid); | |
412 %! [a, b] = textread (f, "%d %s", 4); | |
413 %! assert (a, int32 ([10; 20; 30; 40])); | |
414 %! assert (b, {"a"; "b"; "c"; ""}); | |
415 #%! [a, b] = textread (f, "%d %s", 5); | |
416 #%! assert (a, int32 ([10; 20; 30; 40])); | |
417 #%! assert (b, {"a"; "b"; "c"; ""}); | |
418 %! unlink (f); | |
419 | |
420 ## Tests with format repeat count #5, nr of data lines = limiting factor | |
421 %!test | |
422 %! f = tempname (); | |
423 %! fid = fopen (f, "w"); | |
424 %! fprintf (fid, "%2d\n%s\n%2dn%s", ... | |
425 %! 1, "a", 2, "b"); | |
426 %! fclose (fid); | |
427 %! [a, b] = textread (f, "%d %s", 2); | |
428 %! assert (a, int32 (1)); | |
429 %! assert (b, {"a"}); | |
430 | |
281 ## Read multiple lines using empty format string, missing data (should be 0) | 431 ## Read multiple lines using empty format string, missing data (should be 0) |
282 %!test | 432 %!test |
283 %! f = tempname (); | 433 %! f = tempname (); |
284 %! unlink (f); | 434 %! unlink (f); |
285 %! fid = fopen (f, "w"); | 435 %! fid = fopen (f, "w"); |