# HG changeset patch # User Benjamin Lindner # Date 1237386194 -3600 # Node ID 278afaecddd48b39c1b639c32113db3d7df8c85c # Parent af4fa72ee2504ab057a215aadc4eb252d8a63480 fix leaving stray '\r' in stream when reading from CRLF data file * * * fix CRLF issues with text-mode reading in windows when loading ascii data diff -r af4fa72ee250 -r 278afaecddd4 src/ChangeLog --- a/src/ChangeLog Thu Mar 26 07:29:25 2009 +0100 +++ b/src/ChangeLog Wed Mar 18 15:23:14 2009 +0100 @@ -1,3 +1,21 @@ +2009-03-18 Benjamin Lindner + + * ls-oct-ascii.cc (extract_keyword): fix leaving stray '\r' in stream + when reading from CRLF data file by replacing loop with call to + read_until_newline() + +2009-03-03 Benjamin Lindner + + * ls-ascii-helper.h ls-ascii-helper.cc: New files, provide helper + functions skip_until_newline(), skip_preceeding_newline() and + read_until_newline() that take care of CR/LF handling. + * Makefile.in: add new files + * load-save.cc: Open files always in binary mode in Fload + * ls-mat-ascii.cc (get_mat_data_input_line), ls-oct-ascii.cc + (extract_keyword, read_ascii_data), ls-oct-ascii.h (extract_keyword), + ov-fcn-handle.cc, ov-fcn-inline.cc, ov-range.cc, ov-str-mat.cc + (load_ascii): Use helper functions + 2009-02-25 Marco Caliari * graphics.cc (base_properties::remove_child): Fix order of dims. diff -r af4fa72ee250 -r 278afaecddd4 src/Makefile.in --- a/src/Makefile.in Thu Mar 26 07:29:25 2009 +0100 +++ b/src/Makefile.in Wed Mar 18 15:23:14 2009 +0100 @@ -116,7 +116,7 @@ comment-list.h debug.h defun-dld.h defun-int.h defun.h \ dirfns.h dynamic-ld.h error.h file-io.h gripes.h help.h \ input.h lex.h load-path.h load-save.h ls-hdf5.h \ - ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h \ + ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h ls-ascii-helper.h \ ls-oct-binary.h ls-utils.h mex.h mexproto.h oct-errno.h \ oct-fstrm.h oct-hist.h oct-iostrm.h oct-map.h oct-obj.h \ oct-prcstrm.h oct-procbuf.h oct-stdstrm.h oct-stream.h \ @@ -186,7 +186,7 @@ cutils.c data.cc debug.cc defaults.cc defun.cc dirfns.cc \ dynamic-ld.cc error.cc file-io.cc graphics.cc gripes.cc \ help.cc input.cc lex.l load-path.cc load-save.cc ls-hdf5.cc \ - ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc \ + ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc ls-ascii-helper.cc \ ls-oct-binary.cc ls-utils.cc main.c mappers.cc matherr.c \ mex.cc oct-fstrm.cc oct-hist.cc oct-iostrm.cc oct-map.cc \ oct-obj.cc oct-prcstrm.cc oct-procbuf.cc oct-stream.cc \ diff -r af4fa72ee250 -r 278afaecddd4 src/load-save.cc --- a/src/load-save.cc Thu Mar 26 07:29:25 2009 +0100 +++ b/src/load-save.cc Wed Mar 18 15:23:14 2009 +0100 @@ -906,15 +906,12 @@ std::ios::openmode mode = std::ios::in; - if (format == LS_BINARY -#ifdef HAVE_HDF5 - || format == LS_HDF5 -#endif - || format == LS_MAT_BINARY - || format == LS_MAT5_BINARY - || format == LS_MAT7_BINARY) - mode |= std::ios::binary; - + // Open in binary mode in any case, to fix annoying bug that + // text-mode opened streams cannot be seekg'ed/tellg'ed with + // mingw32 (See http://oldwiki.mingw.org/index.php/Known%20Problems ) + // The CR/LF issues are handled in ls-ascii-helper.cc + mode |= std::ios::binary; + #ifdef HAVE_ZLIB if (use_zlib) { diff -r af4fa72ee250 -r 278afaecddd4 src/ls-ascii-helper.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ls-ascii-helper.cc Wed Mar 18 15:23:14 2009 +0100 @@ -0,0 +1,160 @@ +/* + +Copyright (C) 2003, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + + +#include "ls-ascii-helper.h" + +#include +#include + +// Helper functions when reading from ascii files. +// These function take care of CR/LF issues when files are opened in text-mode for reading + +// Skip characters from stream IS until a newline is reached. +// Depending on KEEP_NEWLINE, either eat newline from stream or +// keep it unread + +void +skip_until_newline( std::istream& is, bool keep_newline ) +{ + if (!is) + return; + + char c,d; + + while (is) + { + c = is.peek(); + if (c == '\n' || c == '\r') + { + // reached newline + if (keep_newline == false) + { + // eat the CR or LF character + is.get(d); + + // make sure that for binary-mode opened ascii files containing CRLF line endings + // we skip the LF after CR... + if (c == '\r' && is.peek()=='\n') + { + // yes, LF following CR, eat it... + is.get(d); + } + } + + // Newline was found, and read from stream if keep_newline==true, so exit loop + break; + } + else + // no newline charater peeked, so read it and proceed to next character + is.get(d); + } + + return; +} + + +// If stream IS currently points to a newline (a leftover from a previous read) +// then eat newline(s) until a non-newline character is found + +void +skip_preceeding_newline( std::istream& is ) +{ + if (!is) + return; + + char c,d; + + // Check if IS currently points to newline character + c = is.peek(); + if (c == '\n' || c == '\r') + { + // Yes, at newline + do { + // eat the CR or LF character + is.get(d); + + // make sure that for binary-mode opened ascii files containing CRLF line endings + // we skip the LF after CR... + if (c == '\r' && is.peek() == '\n') + { + // yes, LF following CR, eat it... + is.get(d); + } + + // Peek into next character + c = is.peek(); + // Loop while still a newline ahead + } while( c == '\n' || c == '\r' ); + } + + return; +} + + +// Read charaters from stream IS until a newline is reached. +// Depending on KEEP_NEWLINE, either eat newline from stream or +// keep it unread +// Characters read are stored and returned as std::string + +std::string +read_until_newline( std::istream& is, bool keep_newline ) +{ + if (!is) + return std::string(); + + char c,d; + std::ostringstream buf; + + while (is) + { + c = is.peek(); + if (c == '\n' || c == '\r') + { + // reached newline + if (keep_newline == false) + { + // eat the CR or LF character + is.get(d); + + // make sure that for binary-mode opened ascii files containing CRLF line endings + // we skip the LF after CR... + if (c == '\r' && is.peek() == '\n') + { + // yes, LF following CR, eat it... + is.get(d); + } + } + + // Newline was found, and read from stream if keep_newline==true, so exit loop + break; + } + else + { + // no newline charater peeked, so read it, store it, and proceed to next + is.get(d); + buf << d; + } + } + + return buf.str(); +} diff -r af4fa72ee250 -r 278afaecddd4 src/ls-ascii-helper.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ls-ascii-helper.h Wed Mar 18 15:23:14 2009 +0100 @@ -0,0 +1,40 @@ +/* + +Copyright (C) 2003, 2005, 2006, 2007 John W. Eaton + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_ls_ascii_helper_h) +#define octave_ls_ascii_helper_h 1 + +#include +#include + +#include "oct-dlldefs.h" + +extern OCTINTERP_API void +skip_until_newline( std::istream& is, bool keep_newline = false ); + +extern OCTINTERP_API void +skip_preceeding_newline( std::istream& is ); + +extern OCTINTERP_API std::string +read_until_newline( std::istream& is, bool keep_newline = false ); + +#endif // !defined (octave_ls_ascii_helper_h) diff -r af4fa72ee250 -r 278afaecddd4 src/ls-mat-ascii.cc --- a/src/ls-mat-ascii.cc Thu Mar 26 07:29:25 2009 +0100 +++ b/src/ls-mat-ascii.cc Wed Mar 18 15:23:14 2009 +0100 @@ -65,6 +65,7 @@ #include "dMatrix.h" #include "ls-mat-ascii.h" +#include "ls-ascii-helper.h" static std::string get_mat_data_input_line (std::istream& is) @@ -81,14 +82,16 @@ while (is.get (c)) { if (c == '\n' || c == '\r') - break; + { + // Let skip_until_newline handle CR/LF issues... + skip_until_newline (is, false); + break; + } if (c == '%' || c == '#') { // skip to end of line - while (is.get (c)) - if (c == '\n' || c == '\r') - break; + skip_until_newline (is, false); break; } diff -r af4fa72ee250 -r 278afaecddd4 src/ls-oct-ascii.cc --- a/src/ls-oct-ascii.cc Thu Mar 26 07:29:25 2009 +0100 +++ b/src/ls-oct-ascii.cc Wed Mar 18 15:23:14 2009 +0100 @@ -108,14 +108,8 @@ while (is.get (c) && (c == ' ' || c == '\t' || c == ':')) ; // Skip whitespace and the colon. - if (c != '\n' && c != '\r') - { - value << c; - while (is.get (c) && c != '\n' && c != '\r') - value << c; - } - - retval = value.str (); + is.putback(c); + retval = read_until_newline (is, false); break; } else if (next_only) diff -r af4fa72ee250 -r 278afaecddd4 src/ls-oct-ascii.h --- a/src/ls-oct-ascii.h Thu Mar 26 07:29:25 2009 +0100 +++ b/src/ls-oct-ascii.h Wed Mar 18 15:23:14 2009 +0100 @@ -29,6 +29,7 @@ #include #include "str-vec.h" +#include "ls-ascii-helper.h" // Flag for cell elements #define CELL_ELT_TAG "" @@ -103,8 +104,8 @@ is >> value; if (is) status = true; - while (is.get (c) && c != '\n' && c != '\r') - ; // Skip to beginning of next line; + // Skip to beginning of next line; + skip_until_newline (is, false); break; } else if (next_only) @@ -165,8 +166,8 @@ is >> value; if (is) status = true; - while (is.get (c) && c != '\n' && c != '\r') - ; // Skip to beginning of next line; + // Skip to beginning of next line; + skip_until_newline (is, false); return status; } } diff -r af4fa72ee250 -r 278afaecddd4 src/ov-fcn-handle.cc --- a/src/ov-fcn-handle.cc Thu Mar 26 07:29:25 2009 +0100 +++ b/src/ov-fcn-handle.cc Wed Mar 18 15:23:14 2009 +0100 @@ -56,6 +56,7 @@ #include "ls-oct-binary.h" #include "ls-hdf5.h" #include "ls-utils.h" +#include "ls-ascii-helper.h" DEFINE_OCTAVE_ALLOCATOR (octave_fcn_handle); @@ -330,26 +331,18 @@ { octave_idx_type len = 0; char c; - std::ostringstream buf; + std::string buf; // Skip preceeding newline(s). - while (is.get (c) && c == '\n') - /* do nothing */; + skip_preceeding_newline (is); if (is) { - buf << c; // Get a line of text whitespace characters included, leaving // newline in the stream. + buf = read_until_newline (is, true); - while (is.peek () != '\n') - { - is.get (c); - if (! is) - break; - buf << c; - } } pos = is.tellg (); @@ -408,7 +401,7 @@ int parse_status; octave_value anon_fcn_handle = - eval_string (buf.str (), true, parse_status); + eval_string (buf, true, parse_status); if (parse_status == 0) { diff -r af4fa72ee250 -r 278afaecddd4 src/ov-fcn-inline.cc --- a/src/ov-fcn-inline.cc Thu Mar 26 07:29:25 2009 +0100 +++ b/src/ov-fcn-inline.cc Wed Mar 18 15:23:14 2009 +0100 @@ -47,6 +47,7 @@ #include "ls-oct-ascii.h" #include "ls-hdf5.h" #include "ls-utils.h" +#include "ls-ascii-helper.h" DEFINE_OCTAVE_ALLOCATOR (octave_fcn_inline); @@ -139,27 +140,20 @@ nm = ""; char c; - std::ostringstream buf; + std::string buf; // Skip preceeding newline(s) - while (is.get (c) && c == '\n'); + skip_preceeding_newline (is); if (is) { - buf << c; // Get a line of text whitespace characters included, leaving // newline in the stream - while (is.peek () != '\n') - { - is.get (c); - if (! is) - break; - buf << c; - } + buf = read_until_newline (is, true); } - iftext = buf.str (); + iftext = buf; octave_fcn_inline tmp (iftext, ifargs, nm); fcn = tmp.fcn; diff -r af4fa72ee250 -r 278afaecddd4 src/ov-range.cc --- a/src/ov-range.cc Thu Mar 26 07:29:25 2009 +0100 +++ b/src/ov-range.cc Wed Mar 18 15:23:14 2009 +0100 @@ -41,6 +41,7 @@ #include "byte-swap.h" #include "ls-hdf5.h" #include "ls-utils.h" +#include "ls-ascii-helper.h" DEFINE_OCTAVE_ALLOCATOR (octave_range); @@ -274,14 +275,9 @@ break; } - for (;;) - { - if (is && (c == '%' || c == '#')) - while (is.get (c) && c != '\n') - ; // Skip to beginning of next line, ignoring everything. - else - break; - } + // Skip to beginning of next line, ignoring everything. + skip_until_newline (is, false); + } bool diff -r af4fa72ee250 -r 278afaecddd4 src/ov-str-mat.cc --- a/src/ov-str-mat.cc Thu Mar 26 07:29:25 2009 +0100 +++ b/src/ov-str-mat.cc Wed Mar 18 15:23:14 2009 +0100 @@ -48,6 +48,7 @@ #include "pr-output.h" #include "pt-mat.h" #include "utils.h" +#include "ls-ascii-helper.h" DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_str); DEFINE_OCTAVE_ALLOCATOR (octave_char_matrix_sq_str); @@ -344,8 +345,7 @@ char *ftmp = tmp.fortran_vec (); // Skip the return line - if (! is.read (ftmp, 1)) - return false; + skip_preceeding_newline (is); if (! is.read (ftmp, dv.numel ()) || !is) {