comparison src/DLD-FUNCTIONS/dlmread.cc @ 10804:3d5c6b84ddaf

speed-up fixes to dlmread
author Jaroslav Hajek <highegg@gmail.com>
date Tue, 20 Jul 2010 12:50:54 +0200
parents b8d76f4be94a
children 7c542263a92a
comparison
equal deleted inserted replaced
10803:75780a2b0417 10804:3d5c6b84ddaf
1 /* 1 /*
2 2
3 Copyright (C) 2008, 2009 Jonathan Stickel 3 Copyright (C) 2008, 2009 Jonathan Stickel
4 Copyright (C) 2010 Jaroslav Hajek
4 5
5 This file is part of Octave. 6 This file is part of Octave.
6 7
7 Octave is free software; you can redistribute it and/or modify it 8 Octave is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the 9 under the terms of the GNU General Public License as published by the
27 #include <config.h> 28 #include <config.h>
28 #endif 29 #endif
29 30
30 #include <cctype> 31 #include <cctype>
31 #include <fstream> 32 #include <fstream>
33 #include <limits>
32 34
33 #include "file-ops.h" 35 #include "file-ops.h"
34 #include "lo-ieee.h" 36 #include "lo-ieee.h"
35 37
36 #include "defun-dld.h" 38 #include "defun-dld.h"
37 #include "error.h" 39 #include "error.h"
38 #include "oct-obj.h" 40 #include "oct-obj.h"
39 #include "utils.h" 41 #include "utils.h"
40 42
43 static const octave_idx_type idx_max = std::numeric_limits<octave_idx_type>::max ();
44
41 static bool 45 static bool
42 read_cell_spec (std::istream& is, unsigned long& row, unsigned long& col) 46 read_cell_spec (std::istream& is, octave_idx_type& row, octave_idx_type& col)
43 { 47 {
44 bool stat = false; 48 bool stat = false;
45 49
46 if (is.peek () == std::istream::traits_type::eof ()) 50 if (is.peek () == std::istream::traits_type::eof ())
47 stat = true; 51 stat = true;
74 return stat; 78 return stat;
75 } 79 }
76 80
77 static bool 81 static bool
78 parse_range_spec (const octave_value& range_spec, 82 parse_range_spec (const octave_value& range_spec,
79 unsigned long& rlo, unsigned long& clo, 83 octave_idx_type& rlo, octave_idx_type& clo,
80 unsigned long& rup, unsigned long& cup) 84 octave_idx_type& rup, octave_idx_type& cup)
81 { 85 {
82 bool stat = true; 86 bool stat = true;
83 87
84 if (range_spec.is_string ()) 88 if (range_spec.is_string ())
85 { 89 {
114 ch = is.get (); 118 ch = is.get ();
115 if (!is || ch != '.') 119 if (!is || ch != '.')
116 stat = false; 120 stat = false;
117 } 121 }
118 122
119 rup = ULONG_MAX - 1; 123 rup = idx_max - 1;
120 cup = ULONG_MAX - 1; 124 cup = idx_max - 1;
121 } 125 }
122 else 126 else
123 { 127 {
124 rup = rlo; 128 rup = rlo;
125 cup = clo; 129 cup = clo;
137 } 141 }
138 else if (range_spec.is_real_matrix () && range_spec.numel () == 4) 142 else if (range_spec.is_real_matrix () && range_spec.numel () == 4)
139 { 143 {
140 ColumnVector range(range_spec.vector_value ()); 144 ColumnVector range(range_spec.vector_value ());
141 // double --> unsigned int 145 // double --> unsigned int
142 rlo = static_cast<unsigned long> (range(0)); 146 rlo = static_cast<octave_idx_type> (range(0));
143 clo = static_cast<unsigned long> (range(1)); 147 clo = static_cast<octave_idx_type> (range(1));
144 rup = static_cast<unsigned long> (range(2)); 148 rup = static_cast<octave_idx_type> (range(2));
145 cup = static_cast<unsigned long> (range(3)); 149 cup = static_cast<octave_idx_type> (range(3));
146 } 150 }
147 else 151 else
148 stat = false; 152 stat = false;
149 153
150 return stat; 154 return stat;
213 if (error_state) 217 if (error_state)
214 return retval; 218 return retval;
215 } 219 }
216 220
217 // Take a subset if a range was given. 221 // Take a subset if a range was given.
218 unsigned long r0 = 0, c0 = 0, r1 = ULONG_MAX-1, c1 = ULONG_MAX-1; 222 octave_idx_type r0 = 0, c0 = 0, r1 = idx_max-1, c1 = idx_max-1;
219 if (nargin > 2) 223 if (nargin > 2)
220 { 224 {
221 if (nargin == 3) 225 if (nargin == 3)
222 { 226 {
223 if (!parse_range_spec (args (2), r0, c0, r1, c1)) 227 if (!parse_range_spec (args (2), r0, c0, r1, c1))
224 error ("dlmread: error parsing range"); 228 error ("dlmread: error parsing range");
225 } 229 }
226 else if (nargin == 4) 230 else if (nargin == 4)
227 { 231 {
228 r0 = args(2).ulong_value (); 232 r0 = args(2).idx_type_value ();
229 c0 = args(3).ulong_value (); 233 c0 = args(3).idx_type_value ();
230 234
231 if (error_state) 235 if (error_state)
232 return retval; 236 return retval;
233 } 237 }
238
239 if (r0 < 0 || c0 < 0)
240 error ("dlmread: left & top must not be negative");
234 } 241 }
235 242
236 if (!error_state) 243 if (!error_state)
237 { 244 {
238 unsigned long i = 0, j = 0, r = 1, c = 1, rmax = 0, cmax = 0; 245 octave_idx_type i = 0, j = 0, r = 1, c = 1, rmax = 0, cmax = 0;
239 246
240 Matrix rdata; 247 Matrix rdata;
241 ComplexMatrix cdata; 248 ComplexMatrix cdata;
242 249
243 bool iscmplx = false; 250 bool iscmplx = false;
244 bool sepflag = false; 251 bool sepflag = false;
245 252
246 unsigned long maxrows = r1 - r0; 253 octave_idx_type maxrows = r1 - r0;
247 254
248 std::string line; 255 std::string line;
249 256
250 // Skip the r0 leading lines as these might be a header. 257 // Skip the r0 leading lines as these might be a header.
251 for (unsigned long m = 0; m < r0; m++) 258 for (octave_idx_type m = 0; m < r0; m++)
252 getline (file, line); 259 getline (file, line);
253 r1 -= r0; 260 r1 -= r0;
261
262 std::istringstream tmp_stream;
254 263
255 // Read in the data one field at a time, growing the data matrix 264 // Read in the data one field at a time, growing the data matrix
256 // as needed. 265 // as needed.
257 while (getline (file, line)) 266 while (getline (file, line))
258 { 267 {
288 break; 297 break;
289 } 298 }
290 } 299 }
291 } 300 }
292 301
302 if (cmax == 0)
303 {
304 // Try to estimate the number of columns.
305 size_t pos1 = 0;
306 do
307 {
308 size_t pos2 = line.find_first_of (sep, pos1);
309
310 if (sepflag && pos2 != std::string::npos)
311 // Treat consecutive separators as one.
312 {
313 pos2 = line.find_first_not_of (sep, pos2);
314 if (pos2 != std::string::npos)
315 pos2 -= 1;
316 else
317 pos2 = line.length () - 1;
318 }
319
320 cmax++;
321
322 if (pos2 != std::string::npos)
323 pos1 = pos2 + 1;
324 else
325 pos1 = std::string::npos;
326
327 }
328 while (pos1 != std::string::npos);
329
330 if (iscmplx)
331 cdata.resize (rmax, cmax);
332 else
333 rdata.resize (rmax, cmax);
334 }
335
293 r = (r > i + 1 ? r : i + 1); 336 r = (r > i + 1 ? r : i + 1);
294 j = 0; 337 j = 0;
295 size_t pos1 = 0; 338 size_t pos1 = 0;
296 do 339 do
297 { 340 {
305 c = (c > j + 1 ? c : j + 1); 348 c = (c > j + 1 ? c : j + 1);
306 if (r > rmax || c > cmax) 349 if (r > rmax || c > cmax)
307 { 350 {
308 // Use resize_and_fill for the case of not-equal 351 // Use resize_and_fill for the case of not-equal
309 // length rows. 352 // length rows.
353 rmax = 2*r;
354 cmax = c;
310 if (iscmplx) 355 if (iscmplx)
311 cdata.resize (r, c, 0); 356 cdata.resize (rmax, cmax);
312 else 357 else
313 rdata.resize (r, c, 0); 358 rdata.resize (rmax, cmax);
314 rmax = r; 359 }
315 cmax = c; 360
316 } 361 tmp_stream.str (str);
317 362 tmp_stream.clear ();
318 std::istringstream tmp_stream (str); 363
319 double x = octave_read_double (tmp_stream); 364 double x = octave_read_double (tmp_stream);
320 if (tmp_stream) 365 if (tmp_stream)
321 { 366 {
322 if (tmp_stream.eof ()) 367 if (tmp_stream.eof ())
323 if (iscmplx) 368 {
324 cdata(i,j++) = x; 369 if (iscmplx)
325 else 370 cdata(i,j++) = x;
326 rdata(i,j++) = x; 371 else
372 rdata(i,j++) = x;
373 }
374 else if (std::toupper (tmp_stream.peek ()) == 'I')
375 {
376 // This is to allow pure imaginary numbers.
377 if (iscmplx)
378 cdata(i,j++) = x;
379 else
380 rdata(i,j++) = x;
381 }
327 else 382 else
328 { 383 {
329 double y = octave_read_double (tmp_stream); 384 double y = octave_read_double (tmp_stream);
330 385
331 if (!iscmplx && y != 0.) 386 if (!iscmplx && y != 0.)
357 break; 412 break;
358 413
359 i++; 414 i++;
360 } 415 }
361 416
362 if (nargin > 2) 417 if (r1 >= r)
363 { 418 r1 = r - 1;
364 if (nargin == 3) 419 if (c1 >= c)
365 { 420 c1 = c - 1;
366 if (r1 >= r) 421
367 r1 = r - 1; 422 // Now take the subset of the matrix.
368 if (c1 >= c) 423 if (iscmplx)
369 c1 = c - 1; 424 cdata = cdata.extract (0, c0, r1, c1);
370 } 425 else
371 else if (nargin == 4) 426 rdata = rdata.extract (0, c0, r1, c1);
372 {
373 // If r1 and c1 are not given, use what was found to be
374 // the maximum.
375 r1 = r - 1;
376 c1 = c - 1;
377 }
378
379 // Now take the subset of the matrix.
380 if (iscmplx)
381 {
382 cdata = cdata.extract (0, c0, r1, c1);
383 cdata.resize (r1 + 1, c1 - c0 + 1);
384 }
385 else
386 {
387 rdata = rdata.extract (0, c0, r1, c1);
388 rdata.resize (r1 + 1, c1 - c0 + 1);
389 }
390 }
391 427
392 if (iscmplx) 428 if (iscmplx)
393 retval(0) = cdata; 429 retval(0) = cdata;
394 else 430 else
395 retval(0) = rdata; 431 retval(0) = rdata;