# HG changeset patch # User Benjamin Lindner # Date 1236661310 14400 # Node ID e7e928088e9005b27ee92f5d71f78f6fe47b67ef # Parent 45f8197ffd51446896b465924810133526dbc5f1 fix CRLF issues with text-mode reading in windows when loading ascii data diff -r 45f8197ffd51 -r e7e928088e90 src/ChangeLog --- a/src/ChangeLog Mon Mar 09 17:13:58 2009 -0400 +++ b/src/ChangeLog Tue Mar 10 01:01:50 2009 -0400 @@ -1,3 +1,16 @@ +2009-03-09 Benjamin Lindner + + * ls-ascii-helper.h, ls-ascii-helper.cc: New files. + * Makefile.in: Add them to the appropriate lists. + * load-save.cc (Fload): Open all files in binary mode. + * ov-range.cc (load_ascii): Explicitly handle CR and CRLF line endings. + * ov-fcn-handle.cc (load_ascii): Likewise. + * ov-fcn-inline.cc (load_ascii): Likewise. + * ov-str-mat.cc (load_ascii): Likewise. + * ls-mat-ascii.cc (get_mat_data_input_line): Likewise. + * ls-oct-ascii.cc (extract_keyword, read_ascii_data): Likewise. + * ls-oct-ascii.h (extract_keyword): Likewise. + 2009-03-09 John W. Eaton * graphics.h.in (OCTAVE_DEFAULT_FONTNAME): New macro, defaults to "*". diff -r 45f8197ffd51 -r e7e928088e90 src/Makefile.in --- a/src/Makefile.in Mon Mar 09 17:13:58 2009 -0400 +++ b/src/Makefile.in Tue Mar 10 01:01:50 2009 -0400 @@ -123,7 +123,7 @@ comment-list.h debug.h defun-dld.h defun-int.h defun.h \ dirfns.h display.h dynamic-ld.h error.h file-io.h gl-render.h \ gripes.h help.h input.h lex.h load-path.h load-save.h ls-hdf5.h \ - ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h \ + ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h ls-ascii-helper.h \ ls-oct-binary.h ls-utils.h mex.h mexproto.h oct-errno.h \ oct-fstrm.h oct-hdf5.h oct-hist.h oct-iostrm.h oct-map.h oct-obj.h \ oct-prcstrm.h oct-procbuf.h oct-stdstrm.h oct-stream.h \ @@ -217,9 +217,10 @@ DIST_SRC := Cell.cc bitfcns.cc c-file-ptr-stream.cc comment-list.cc \ cutils.c data.cc debug.cc defaults.cc defun.cc dirfns.cc \ - display.cc dynamic-ld.cc error.cc file-io.cc gl-render.cc graphics.cc \ - gripes.cc help.cc input.cc lex.l load-path.cc load-save.cc \ - ls-hdf5.cc ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc \ + display.cc dynamic-ld.cc error.cc file-io.cc gl-render.cc \ + graphics.cc gripes.cc help.cc input.cc lex.l load-path.cc \ + load-save.cc ls-hdf5.cc ls-mat-ascii.cc ls-mat4.cc \ + ls-mat5.cc ls-oct-ascii.cc ls-ascii-helper.cc \ ls-oct-binary.cc ls-utils.cc main.c mappers.cc matherr.c \ mex.cc oct-fstrm.cc oct-hist.cc oct-iostrm.cc oct-map.cc \ oct-obj.cc oct-prcstrm.cc oct-procbuf.cc oct-stream.cc \ diff -r 45f8197ffd51 -r e7e928088e90 src/load-save.cc --- a/src/load-save.cc Mon Mar 09 17:13:58 2009 -0400 +++ b/src/load-save.cc Tue Mar 10 01:01:50 2009 -0400 @@ -791,16 +791,9 @@ { i++; - std::ios::openmode mode = std::ios::in; - - if (format == LS_BINARY -#ifdef HAVE_HDF5 - || format == LS_HDF5 -#endif - || format == LS_MAT_BINARY - || format == LS_MAT5_BINARY - || format == LS_MAT7_BINARY) - mode |= std::ios::binary; + // Always open in binary mode and handle various + // line-endings explicitly. + std::ios::openmode mode = std::ios::in | std::ios::binary; #ifdef HAVE_ZLIB if (use_zlib) diff -r 45f8197ffd51 -r e7e928088e90 src/ls-ascii-helper.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ls-ascii-helper.cc Tue Mar 10 01:01:50 2009 -0400 @@ -0,0 +1,181 @@ +/* + +Copyright (C) 2009 Benjamin Lindner + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "ls-ascii-helper.h" + +#include +#include + +// Helper functions when reading from ascii files. + +// These function take care of CR/LF issues when files are opened in +// text-mode for reading. + +// Skip characters from stream IS until a newline is reached. +// Depending on KEEP_NEWLINE, either eat newline from stream or +// keep it unread. + +void +skip_until_newline (std::istream& is, bool keep_newline) +{ + if (! is) + return; + + while (is) + { + char c = is.peek (); + + if (c == '\n' || c == '\r') + { + // Reached newline. + if (! keep_newline) + { + // Eat the CR or LF character. + char d; + is.get (d); + + // Make sure that for binary-mode opened ascii files + // containing CRLF line endings we skip the LF after CR. + if (c == '\r' && is.peek () == '\n') + { + // Yes, LF following CR, eat it. + is.get (d); + } + } + + // Newline was found, and read from stream if + // keep_newline == true, so exit loop. + break; + } + else + { + // No newline charater peeked, so read it and proceed to next + // character. + char d; + is.get (d); + } + } +} + + +// If stream IS currently points to a newline (a leftover from a +// previous read) then eat newline(s) until a non-newline character is +// found. + +void +skip_preceeding_newline (std::istream& is) +{ + if (! is) + return; + + // Check whether IS currently points to newline character. + char c = is.peek (); + + if (c == '\n' || c == '\r') + { + // Yes, at newline. + do + { + // Eat the CR or LF character. + char d; + is.get (d); + + // Make sure that for binary-mode opened ascii files + // containing CRLF line endings we skip the LF after CR. + if (c == '\r' && is.peek () == '\n') + { + // Yes, LF following CR, eat it. + is.get (d); + } + + // Peek into next character. + c = is.peek (); + + // Loop while still a newline ahead. + } + while (c == '\n' || c == '\r'); + } +} + +// Read charaters from stream IS until a newline is reached. +// Depending on KEEP_NEWLINE, either eat newline from stream or keep +// it unread. Characters read are stored and returned as +// std::string. + +std::string +read_until_newline (std::istream& is, bool keep_newline) +{ + if (! is) + return std::string (); + + std::ostringstream buf; + + while (is) + { + char c = is.peek (); + + if (c == '\n' || c == '\r') + { + // Reached newline. + if (! keep_newline) + { + // Eat the CR or LF character. + char d; + is.get (d); + + // Make sure that for binary-mode opened ascii files + // containing CRLF line endings we skip the LF after + // CR. + + if (c == '\r' && is.peek () == '\n') + { + // Yes, LF following CR, eat it. + is.get (d); + } + } + + // Newline was found, and read from stream if + // keep_newline == true, so exit loop. + break; + } + else + { + // No newline charater peeked, so read it, store it, and + // proceed to next. + char d; + is.get (d); + buf << d; + } + } + + return buf.str (); +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f8197ffd51 -r e7e928088e90 src/ls-ascii-helper.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ls-ascii-helper.h Tue Mar 10 01:01:50 2009 -0400 @@ -0,0 +1,44 @@ +/* + +Copyright (C) 2009 Benjamin Lindner + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +. + +*/ + +#if !defined (octave_ls_ascii_helper_h) +#define octave_ls_ascii_helper_h 1 + +#include +#include + +extern OCTINTERP_API void +skip_until_newline (std::istream& is, bool keep_newline = false); + +extern OCTINTERP_API void +skip_preceeding_newline (std::istream& is); + +extern OCTINTERP_API std::string +read_until_newline (std::istream& is, bool keep_newline = false); + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff -r 45f8197ffd51 -r e7e928088e90 src/ls-mat-ascii.cc --- a/src/ls-mat-ascii.cc Mon Mar 09 17:13:58 2009 -0400 +++ b/src/ls-mat-ascii.cc Tue Mar 10 01:01:50 2009 -0400 @@ -51,6 +51,8 @@ #include "gripes.h" #include "lex.h" #include "load-save.h" +#include "ls-ascii-helper.h" +#include "ls-mat-ascii.h" #include "oct-obj.h" #include "oct-map.h" #include "ov-cell.h" @@ -63,8 +65,6 @@ #include "version.h" #include "dMatrix.h" -#include "ls-mat-ascii.h" - static std::string get_mat_data_input_line (std::istream& is) { @@ -80,15 +80,14 @@ while (is.get (c)) { if (c == '\n' || c == '\r') - break; + { + skip_until_newline (is, false); + break; + } if (c == '%' || c == '#') { - // skip to end of line - while (is.get (c)) - if (c == '\n' || c == '\r') - break; - + skip_until_newline (is, false); break; } diff -r 45f8197ffd51 -r e7e928088e90 src/ls-oct-ascii.cc --- a/src/ls-oct-ascii.cc Mon Mar 09 17:13:58 2009 -0400 +++ b/src/ls-oct-ascii.cc Tue Mar 10 01:01:50 2009 -0400 @@ -51,6 +51,8 @@ #include "error.h" #include "gripes.h" #include "load-save.h" +#include "ls-ascii-helper.h" +#include "ls-oct-ascii.h" #include "oct-obj.h" #include "oct-map.h" #include "ov-cell.h" @@ -62,8 +64,6 @@ #include "version.h" #include "dMatrix.h" -#include "ls-oct-ascii.h" - // The number of decimal digits to use when writing ascii data. static int Vsave_precision = 16; @@ -123,10 +123,7 @@ else if (next_only) break; else - { - while (is.get (c) && c != '\n' && c != '\r') - ; // Skip to end of line. - } + skip_until_newline (is, false); } } diff -r 45f8197ffd51 -r e7e928088e90 src/ls-oct-ascii.h --- a/src/ls-oct-ascii.h Mon Mar 09 17:13:58 2009 -0400 +++ b/src/ls-oct-ascii.h Tue Mar 10 01:01:50 2009 -0400 @@ -30,6 +30,8 @@ #include "str-vec.h" +#include "ls-ascii-helper.h" + // Flag for cell elements #define CELL_ELT_TAG "" @@ -103,8 +105,7 @@ is >> value; if (is) status = true; - while (is.get (c) && c != '\n' && c != '\r') - ; // Skip to beginning of next line; + skip_until_newline (is, false); break; } else if (next_only) @@ -165,8 +166,7 @@ is >> value; if (is) status = true; - while (is.get (c) && c != '\n' && c != '\r') - ; // Skip to beginning of next line; + skip_until_newline (is, false); return status; } } diff -r 45f8197ffd51 -r e7e928088e90 src/ov-fcn-handle.cc --- a/src/ov-fcn-handle.cc Mon Mar 09 17:13:58 2009 -0400 +++ b/src/ov-fcn-handle.cc Tue Mar 10 01:01:50 2009 -0400 @@ -55,9 +55,10 @@ #include "oct-env.h" #include "byte-swap.h" +#include "ls-ascii-helper.h" +#include "ls-hdf5.h" #include "ls-oct-ascii.h" #include "ls-oct-binary.h" -#include "ls-hdf5.h" #include "ls-utils.h" DEFINE_OCTAVE_ALLOCATOR (octave_fcn_handle); @@ -297,34 +298,25 @@ if (nm == "@") { - octave_idx_type len = 0; - char c; - std::ostringstream buf; + skip_preceeding_newline (is); - // Skip preceeding newline(s). - while (is.get (c) && c == '\n') - /* do nothing */; + std::string buf; if (is) { - buf << c; // Get a line of text whitespace characters included, leaving // newline in the stream. + buf = read_until_newline (is, true); - while (is.peek () != '\n') - { - is.get (c); - if (! is) - break; - buf << c; - } } pos = is.tellg (); symbol_table::scope_id local_scope = symbol_table::alloc_scope (); + octave_idx_type len = 0; + if (extract_keyword (is, "length", len, true) && len >= 0) { if (len > 0) @@ -363,7 +355,7 @@ int parse_status; octave_value anon_fcn_handle = - eval_string (buf.str (), true, parse_status); + eval_string (buf, true, parse_status); if (parse_status == 0) { diff -r 45f8197ffd51 -r e7e928088e90 src/ov-fcn-inline.cc --- a/src/ov-fcn-inline.cc Mon Mar 09 17:13:58 2009 -0400 +++ b/src/ov-fcn-inline.cc Tue Mar 10 01:01:50 2009 -0400 @@ -48,6 +48,7 @@ #include "toplev.h" #include "byte-swap.h" +#include "ls-ascii-helper.h" #include "ls-oct-ascii.h" #include "ls-hdf5.h" #include "ls-utils.h" @@ -162,29 +163,19 @@ if (nm == "0") nm = ""; - char c; - std::ostringstream buf; + skip_preceeding_newline (is); - // Skip preceeding newline(s) - while (is.get (c) && c == '\n') - /* do nothing */; + std::string buf; if (is) { - buf << c; - // Get a line of text whitespace characters included, leaving - // newline in the stream - while (is.peek () != '\n') - { - is.get (c); - if (! is) - break; - buf << c; - } + // Get a line of text whitespace characters included, + // leaving newline in the stream. + buf = read_until_newline (is, true); } - iftext = buf.str (); + iftext = buf; octave_fcn_inline tmp (iftext, ifargs, nm); fcn = tmp.fcn; diff -r 45f8197ffd51 -r e7e928088e90 src/ov-range.cc --- a/src/ov-range.cc Mon Mar 09 17:13:58 2009 -0400 +++ b/src/ov-range.cc Tue Mar 10 01:01:50 2009 -0400 @@ -39,6 +39,7 @@ #include "pr-output.h" #include "byte-swap.h" +#include "ls-ascii-helper.h" #include "ls-hdf5.h" #include "ls-utils.h" @@ -326,14 +327,7 @@ break; } - for (;;) - { - if (is && (c == '%' || c == '#')) - while (is.get (c) && c != '\n') - ; // Skip to beginning of next line, ignoring everything. - else - break; - } + skip_until_newline (is, false); } bool diff -r 45f8197ffd51 -r e7e928088e90 src/ov-str-mat.cc --- a/src/ov-str-mat.cc Mon Mar 09 17:13:58 2009 -0400 +++ b/src/ov-str-mat.cc Tue Mar 10 01:01:50 2009 -0400 @@ -36,11 +36,12 @@ #include "mx-base.h" #include "oct-locbuf.h" +#include "byte-swap.h" #include "defun.h" -#include "byte-swap.h" #include "gripes.h" +#include "ls-ascii-helper.h" +#include "ls-hdf5.h" #include "ls-oct-ascii.h" -#include "ls-hdf5.h" #include "ls-utils.h" #include "oct-obj.h" #include "oct-stream.h" @@ -316,9 +317,7 @@ { char *ftmp = tmp.fortran_vec (); - // Skip the return line - if (! is.read (ftmp, 1)) - return false; + skip_preceeding_newline (is); if (! is.read (ftmp, dv.numel ()) || !is) {