Mercurial > octave
changeset 28357:8aef9f5755fe
importdata.m: Properly process HEADER_ROWS input (bug #58294).
* importdata.m: Detect when user-supplied number of HEADER_ROWS differs from
auto-detected number in file; Issue a warning with new ID
"Octave:importdata:headerrows_mismatch". Call dlmread with user-supplied
number of header rows (manual override). Remove header rows from list
of exceptional data to process as this has already been handled.
Add regression BIST test for bug #58294. Add new BIST test for new warning.
author | Rik <rik@octave.org> |
---|---|
date | Sat, 23 May 2020 14:13:11 -0700 |
parents | 4e4baa5ac03c |
children | 3c5304bbb71e |
files | scripts/io/importdata.m |
diffstat | 1 files changed, 54 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/io/importdata.m Sat May 23 08:52:51 2020 -0700 +++ b/scripts/io/importdata.m Sat May 23 14:13:11 2020 -0700 @@ -264,15 +264,23 @@ fclose (fid); if (num_header_rows >= 0) - header_rows = num_header_rows; + ## User has defined a number of header rows which disagrees with the + ## auto-detected number. Print a warning. + if (num_header_rows < header_rows) + warning ("Octave:importdata:headerrows_mismatch", + "importdata: detected %d header rows, but HEADER_ROWS input configured %d rows", header_rows, num_header_rows); + endif + else + ## use the automatically detected number of header rows + num_header_rows = header_rows; endif ## Now, let the efficient built-in routine do the bulk of the work. if (delimiter == " ") - output.data = dlmread (fname, "", header_rows, header_cols, + output.data = dlmread (fname, "", num_header_rows, header_cols, "emptyvalue", NA); else - output.data = dlmread (fname, delimiter, header_rows, header_cols, + output.data = dlmread (fname, delimiter, num_header_rows, header_cols, "emptyvalue", NA); endif @@ -290,8 +298,11 @@ file_content = ostrsplit (fileread (fname), "\r\n", true); na_rows = find (any (na_idx, 2)); + ## Prune text lines in header that were already collected + idx = (na_rows(1:min (header_rows, end)) + num_header_rows) <= header_rows; + na_rows(idx) = []; for ridx = na_rows(:)' - row = file_content{ridx+header_rows}; + row = file_content{ridx+num_header_rows}; if (delimiter == " ") fields = regexp (strtrim (row), ' +', 'split'); else @@ -323,6 +334,9 @@ if (all (cellfun ("isempty", output.textdata))) output = output.data; endif + if (num_header_rows != header_rows) + header_rows = num_header_rows; + endif endfunction @@ -610,6 +624,35 @@ %! assert (d, ""); %! assert (h, 3); +%!test <*58294> +%! ## Varying values of header lines field +%! fn = tempname (); +%! fid = fopen (fn, "w"); +%! fputs (fid, "header1\nheader2\n3.1\n4.2"); +%! fclose (fid); +%! warning ("off", "Octave:importdata:headerrows_mismatch", "local"); +%! ## Base import +%! [a, d, h] = importdata (fn, ""); +%! assert (a.data, [3.1; 4.2]); +%! assert (a.textdata, {"header1"; "header2"}); +%! assert (h, 2); +%! ## Import with 0 header lines +%! [a, d, h] = importdata (fn, "", 0); +%! assert (a.data, [NA; NA; 3.1; 4.2]); +%! assert (a.textdata, {"header1"; "header2"}); +%! assert (h, 0); +%! ## Import with 1 header lines +%! [a, d, h] = importdata (fn, "", 1); +%! assert (a.data, [NA; 3.1; 4.2]); +%! assert (a.textdata, {"header1"; "header2"}); +%! assert (h, 1); +%! ## Import with 3 header lines +%! [a, d, h] = importdata (fn, "", 3); +%! assert (a.data, [4.2]); +%! assert (a.textdata, {"header1"; "header2"; "3.1"}); +%! assert (h, 3); +%! unlink (fn); + %!error importdata () %!error importdata (1,2,3,4) %!error <FNAME must be a string> importdata (1) @@ -619,3 +662,10 @@ %!error <HEADER_ROWS must be an integer> importdata ("foo", " ", "1") %!error <HEADER_ROWS must be an integer> importdata ("foo", " ", 1.5) %!error <not implemented for file format .avi> importdata ("foo.avi") +%!warning <detected 2 header rows, but HEADER_ROWS input configured 1 rows> +%! fn = tempname (); +%! fid = fopen (fn, "w"); +%! fputs (fid, "header1\nheader2\n3.1"); +%! fclose (fid); +%! a = importdata (fn, "", 1); +%! unlink (fn);