Mercurial > octave-nkf
comparison scripts/io/textread.m @ 14565:98aaebc56d7c
2012-03-25 Philip Nienhuis <prnienhuis@users.sf.net>
* textscan.m, textread.m
Updated texinfo header (@var{n} format repeat count section)
Replaced slow fgets / str concat section by block reading
Supplied varargout in some cases to avoid unneeded errors
Improvements to coding style
* textscan.m
Moved some code upward to avoid having multiple fclose statements
author | Philip Nienhuis <prnienhuis@@users.sf.net> |
---|---|
date | Fri, 30 Mar 2012 18:44:01 +0200 |
parents | df5488e46dca |
children | 1804d5422f61 |
comparison
equal
deleted
inserted
replaced
14564:a459d42bb0a6 | 14565:98aaebc56d7c |
---|---|
39 ## Specify a single character or "\r\n". If no value is given, it will be | 39 ## Specify a single character or "\r\n". If no value is given, it will be |
40 ## inferred from the file. If set to "" (empty string) EOLs are ignored as | 40 ## inferred from the file. If set to "" (empty string) EOLs are ignored as |
41 ## delimiters. | 41 ## delimiters. |
42 ## @end itemize | 42 ## @end itemize |
43 ## | 43 ## |
44 ## The optional input @var{n} specifes the number of times to use | 44 ## The optional input @var{n} specifes the number of data lines to read; in |
45 ## @var{format} when parsing, i.e., the format repeat count. | 45 ## this sense it differs slightly from the format repeat count in strread. |
46 ## | 46 ## |
47 ## @seealso{strread, load, dlmread, fscanf, textscan} | 47 ## @seealso{strread, load, dlmread, fscanf, textscan} |
48 ## @end deftypefn | 48 ## @end deftypefn |
49 | 49 |
50 function varargout = textread (filename, format = "%f", varargin) | 50 function varargout = textread (filename, format = "%f", varargin) |
51 | |
52 BUFLENGTH = 4096; # Read buffer to speed up processing @var{n} | |
51 | 53 |
52 ## Check input | 54 ## Check input |
53 if (nargin < 1) | 55 if (nargin < 1) |
54 print_usage (); | 56 print_usage (); |
55 endif | 57 endif |
56 | 58 |
57 if (! ischar (filename) || ! ischar (format)) | 59 if (! ischar (filename) || ! ischar (format)) |
58 error ("textread: FILENAME and FORMAT arguments must be strings"); | 60 error ("textread: FILENAME and FORMAT arguments must be strings"); |
61 endif | |
62 | |
63 if (! isempty (varargin) && isnumeric (varargin{1})) | |
64 nlines = varargin{1}; | |
65 else | |
66 nlines = Inf; | |
67 endif | |
68 if (nlines < 1) | |
69 printf ("textread: N = 0, no data read\n"); | |
70 varargout = cell (1, nargout); | |
71 return | |
59 endif | 72 endif |
60 | 73 |
61 ## Read file | 74 ## Read file |
62 fid = fopen (filename, "r"); | 75 fid = fopen (filename, "r"); |
63 if (fid == -1) | 76 if (fid == -1) |
69 ## Beware of zero valued headerline, fskipl would skip to EOF | 82 ## Beware of zero valued headerline, fskipl would skip to EOF |
70 if (! isempty (headerlines) && (varargin{headerlines + 1} > 0)) | 83 if (! isempty (headerlines) && (varargin{headerlines + 1} > 0)) |
71 fskipl (fid, varargin{headerlines + 1}); | 84 fskipl (fid, varargin{headerlines + 1}); |
72 varargin(headerlines:headerlines+1) = []; | 85 varargin(headerlines:headerlines+1) = []; |
73 endif | 86 endif |
74 | 87 st_pos = ftell (fid); |
75 if (! isempty (varargin) && isnumeric (varargin{1})) | |
76 nlines = varargin{1}; | |
77 else | |
78 nlines = Inf; | |
79 endif | |
80 | 88 |
81 if (isfinite (nlines) && (nlines >= 0)) | 89 ## Read a first file chunk. Rest follows after endofline processing |
82 str = tmp_str = ""; | 90 [str, count] = fscanf (fid, "%c", BUFLENGTH); |
83 n = 0; | 91 if (isempty (str) || count < 1) |
84 ## FIXME: Can this be done without slow loop? | |
85 while (ischar (tmp_str) && n++ <= nlines) | |
86 str = strcat (str, tmp_str); | |
87 tmp_str = fgets (fid); | |
88 endwhile | |
89 else | |
90 str = fread (fid, "char=>char").'; | |
91 endif | |
92 fclose (fid); | |
93 | |
94 if (isempty (str)) | |
95 warning ("textread: empty file"); | 92 warning ("textread: empty file"); |
93 varargout = cell (1, nargout); | |
96 return; | 94 return; |
97 endif | 95 endif |
98 | 96 |
99 endofline = find (strcmpi (varargin, "endofline"), 1); | 97 endofline = find (strcmpi (varargin, "endofline"), 1); |
100 if (! isempty (endofline)) | 98 if (! isempty (endofline)) |
101 ## 'endofline' option set by user. | 99 ## 'endofline' option set by user. |
102 if (! ischar (varargin{endofline + 1})); | 100 if (! ischar (varargin{endofline + 1})); |
103 error ("textread: character value required for EndOfLine"); | 101 error ("textread: character value required for EndOfLine"); |
104 endif | 102 endif |
105 else | 103 else |
106 ## Determine EOL from file. Search for EOL candidates in first 3000 chars | 104 ## Determine EOL from file. Search for EOL candidates in first BUFLENGTH chars |
107 eol_srch_len = min (length (str), 3000); | 105 eol_srch_len = min (length (str), BUFLENGTH); |
108 ## First try DOS (CRLF) | 106 ## First try DOS (CRLF) |
109 if (! isempty (strfind ("\r\n", str(1 : eol_srch_len)))) | 107 if (! isempty (strfind ("\r\n", str(1 : eol_srch_len)))) |
110 eol_char = "\r\n"; | 108 eol_char = "\r\n"; |
111 ## Perhaps old Macintosh? (CR) | 109 ## Perhaps old Macintosh? (CR) |
112 elseif (! isempty (strfind ("\r", str(1 : eol_srch_len)))) | 110 elseif (! isempty (strfind ("\r", str(1 : eol_srch_len)))) |
114 ## Otherwise, use plain UNIX (LF) | 112 ## Otherwise, use plain UNIX (LF) |
115 else | 113 else |
116 eol_char = "\n"; | 114 eol_char = "\n"; |
117 endif | 115 endif |
118 ## Set up default endofline param value | 116 ## Set up default endofline param value |
119 varargin(end+1:end+2) = {'endofline', eol_char}; | 117 varargin(end+1:end+2) = {"endofline", eol_char}; |
120 endif | 118 endif |
121 | 119 |
120 ## Now that we know what EOL looks like, we can process format_repeat_count. | |
121 ## FIXME The below isn't ML-compatible: counts lines, not format string uses | |
122 if (isfinite (nlines) && (nlines > 0)) | |
123 l_eol_char = length (eol_char); | |
124 eoi = findstr (str, eol_char); | |
125 n_eoi = length (eoi); | |
126 nblks = 0; | |
127 ## Avoid slow repeated str concatenation, first seek requested end of data | |
128 while (n_eoi < nlines && count == BUFLENGTH) | |
129 [nstr, count] = fscanf (fid, "%c", BUFLENGTH); | |
130 if (count > 0) | |
131 ## Watch out for multichar EOL being missed across buffer boundaries | |
132 if (l_eol_char > 1) | |
133 str = [str(end - length (eol_char) + 2 : end) nstr]; | |
134 else | |
135 str = nstr; | |
136 endif | |
137 eoi = findstr (str, eol_char); | |
138 n_eoi += numel (eoi); | |
139 ++nblks; | |
140 endif | |
141 endwhile | |
142 ## Found EOL delimiting last requested line. Compute ptr (incl. EOL) | |
143 if (isempty (eoi)) | |
144 printf ("textread: format repeat count specified but no endofline found\n"); | |
145 eoi_pos = nblks * BUFLENGTH + count; | |
146 else | |
147 eoi_pos = (nblks * BUFLENGTH) + eoi(end + min (nlines, n_eoi) - n_eoi); | |
148 endif | |
149 fseek (fid, st_pos, "bof"); | |
150 str = fscanf (fid, "%c", eoi_pos); | |
151 else | |
152 fseek (fid, st_pos, "bof"); | |
153 str = fread(fid, "char=>char").'; | |
154 endif | |
155 fclose (fid); | |
156 | |
122 ## Set up default whitespace param value if needed | 157 ## Set up default whitespace param value if needed |
123 if (isempty (find (strcmpi ('whitespace', varargin)))) | 158 if (isempty (find (strcmpi ("whitespace", varargin)))) |
124 varargin(end+1:end+2) = {'whitespace', " \b\t"}; | 159 varargin(end+1:end+2) = {"whitespace", " \b\t"}; |
125 endif | 160 endif |
126 | 161 |
127 ## Call strread to make it do the real work | 162 ## Call strread to make it do the real work |
128 [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:}); | 163 [varargout{1:max (nargout, 1)}] = strread (str, format, varargin {:}); |
129 | 164 |