Mercurial > octave-nkf
comparison scripts/io/textread.m @ 16357:0cbe330f39a2
textscan.m, textread.m: allow reading multi-column data files with empty format + tests (bug #38317)
author | Philip Nienhuis <prnienhuis@users.sf.net> |
---|---|
date | Fri, 22 Mar 2013 17:46:04 +0100 |
parents | 9c4ac8f25a8c |
children | 12005245b645 |
comparison
equal
deleted
inserted
replaced
16356:df643a532b61 | 16357:0cbe330f39a2 |
---|---|
41 ## delimiters. | 41 ## delimiters. |
42 ## @end itemize | 42 ## @end itemize |
43 ## | 43 ## |
44 ## The optional input @var{n} specifies the number of data lines to read; in | 44 ## The optional input @var{n} specifies the number of data lines to read; in |
45 ## this sense it differs slightly from the format repeat count in strread. | 45 ## this sense it differs slightly from the format repeat count in strread. |
46 ## | |
47 ## If the format string is empty (not: omitted) and the file contains only | |
48 ## numeric data (excluding headerlines), textread will return a rectangular | |
49 ## matrix with the number of columns matching the number of numeric fields on | |
50 ## the first data line of the file. Empty fields are returned as zero values. | |
46 ## | 51 ## |
47 ## @seealso{strread, load, dlmread, fscanf, textscan} | 52 ## @seealso{strread, load, dlmread, fscanf, textscan} |
48 ## @end deftypefn | 53 ## @end deftypefn |
49 | 54 |
50 function varargout = textread (filename, format = "%f", varargin) | 55 function varargout = textread (filename, format = "%f", varargin) |
172 endif | 177 endif |
173 | 178 |
174 ## Call strread to make it do the real work | 179 ## Call strread to make it do the real work |
175 [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:}); | 180 [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:}); |
176 | 181 |
182 ## Hack to concatenate/reshape numeric output into 2D array (undocumented ML) | |
183 ## In ML this only works in case of an empty format string | |
184 if (isempty (format)) | |
185 ## Get number of fields per line. | |
186 ## 1. Get eol_char position | |
187 iwhsp = find (strcmpi ("whitespace", varargin)); | |
188 whsp = varargin{iwhsp + 1}; | |
189 idx = regexp (str, eol_char, "once"); | |
190 ## 2. Get first data line til EOL. Avoid corner case of just one line | |
191 if (! isempty (idx)) | |
192 str = str(1:idx-1); | |
193 endif | |
194 idelimiter = find (strcmpi (varargin, "delimiter"), 1); | |
195 if (isempty (idelimiter)) | |
196 ## Assume delimiter = whitespace | |
197 ## 3A. whitespace incl. consecutive whitespace => single space | |
198 str = regexprep (str, sprintf ("[%s]+", whsp), ' '); | |
199 ## 4A. Remove possible leading & trailing spaces | |
200 str = strtrim (str); | |
201 ## 5A. Count spaces, add one to get nr of data fields per line | |
202 ncols = numel (strfind (str, " ")) + 1; | |
203 else | |
204 ## 3B. Just count delimiters. FIXME: delimiters could occur in literals | |
205 delimiter = varargin {idelimiter+1}; | |
206 ncols = numel (regexp (str, sprintf ("[%s]", delimiter))) + 1; | |
207 endif | |
208 ## 6. Reshape; watch out, we need a transpose | |
209 nrows = ceil (numel (varargout{1}) / ncols); | |
210 pad = mod (numel (varargout{1}), ncols); | |
211 if (pad > 0) | |
212 pad = ncols - pad; | |
213 varargout{1}(end+1 : end+pad) = NaN; | |
214 endif | |
215 varargout{1} = reshape (varargout{1}, ncols, nrows)'; | |
216 ## ML replaces empty values with NaNs | |
217 varargout{1}(find (isnan (varargout{1}))) = 0; | |
218 endif | |
219 | |
177 endfunction | 220 endfunction |
178 | |
179 | 221 |
180 %!test | 222 %!test |
181 %! f = tmpnam (); | 223 %! f = tmpnam (); |
182 %! d = rand (5, 3); | 224 %! d = rand (5, 3); |
183 %! dlmwrite (f, d, "precision", "%5.2f"); | 225 %! dlmwrite (f, d, "precision", "%5.2f"); |
192 %! d = rand (7, 2); | 234 %! d = rand (7, 2); |
193 %! dlmwrite (f, d, "precision", "%5.2f"); | 235 %! dlmwrite (f, d, "precision", "%5.2f"); |
194 %! [a, b] = textread (f, "%f, %f", "headerlines", 1); | 236 %! [a, b] = textread (f, "%f, %f", "headerlines", 1); |
195 %! unlink (f); | 237 %! unlink (f); |
196 %! assert (a, d(2:7, 1), 1e-2); | 238 %! assert (a, d(2:7, 1), 1e-2); |
239 | |
240 %% Test reading 2D matrix with empty format | |
241 %!test | |
242 %! f = tmpnam (); | |
243 %! d = rand (5, 2); | |
244 %! dlmwrite (f, d, "precision", "%5.2f"); | |
245 %! A = textread (f, "", "headerlines", 3); | |
246 %! unlink (f); | |
247 %! assert (A, d(4:5, :), 1e-2); | |
248 | |
249 %% Read multiple lines using empty format string | |
250 %!test | |
251 %! f = tmpnam (); | |
252 %! unlink (f); | |
253 %! fid = fopen (f, "w"); | |
254 %! d = rand (1, 4); | |
255 %! fprintf (fid, " %f %f %f %f ", d); | |
256 %! fclose (fid); | |
257 %! A = textread (f, ""); | |
258 %! unlink (f); | |
259 %! assert (A, d, 1e-6); | |
260 | |
261 %% Empty format, corner case = one line w/o EOL | |
262 %!test | |
263 %! f = tmpnam (); | |
264 %! unlink (f); | |
265 %! fid = fopen (f, "w"); | |
266 %! d = rand (1, 4); | |
267 %! fprintf (fid, " %f %f %f %f ", d); | |
268 %! fclose (fid); | |
269 %! A = textread (f, ""); | |
270 %! unlink (f); | |
271 %! assert (A, d, 1e-6); | |
272 | |
273 %% Read multiple lines using empty format string, missing data (should be 0) | |
274 %!test | |
275 %! f = tmpnam (); | |
276 %! unlink (f); | |
277 %! fid = fopen (f, "w"); | |
278 %! d = rand (1, 4); | |
279 %! fprintf (fid, "%f, %f, , %f, %f ", d); | |
280 %! fclose (fid); | |
281 %! A = textread (f, ""); | |
282 %! unlink (f); | |
283 %! assert (A, [ d(1:2) 0 d(3:4)], 1e-6); | |
284 | |
285 %% Test with empty positions - ML returns 0 for empty fields | |
286 %!test | |
287 %! f = tmpnam (); | |
288 %! unlink (f); | |
289 %! fid = fopen (f, "w"); | |
290 %! d = rand (1, 4); | |
291 %! fprintf (fid, ",2,,4\n5,,7,\n"); | |
292 %! fclose (fid); | |
293 %! A = textread (f, "", "delimiter", ","); | |
294 %! unlink (f); | |
295 %! assert (A, [0 2 0 4; 5 0 7 0], 1e-6); | |
296 | |
297 %% Another test with empty format + positions, now with more incomplete lower | |
298 %% row (must be appended with zeros to get rectangular matrix) | |
299 %!test | |
300 %! f = tmpnam (); | |
301 %! unlink (f); | |
302 %! fid = fopen (f, "w"); | |
303 %! d = rand (1, 4); | |
304 %! fprintf (fid, ",2,,4\n5,\n"); | |
305 %! fclose (fid); | |
306 %! A = textread (f, "", "delimiter", ","); | |
307 %! unlink (f); | |
308 %! assert (A, [0 2 0 4; 5 0 0 0], 1e-6); | |
197 | 309 |
198 %% Test input validation | 310 %% Test input validation |
199 %!error textread () | 311 %!error textread () |
200 %!error textread (1) | 312 %!error textread (1) |
201 %!error <arguments must be strings> textread (1, "%f") | 313 %!error <arguments must be strings> textread (1, "%f") |