changeset 8946:e7e928088e90

fix CRLF issues with text-mode reading in windows when loading ascii data
author Benjamin Lindner <lindnerb@users.sourceforge.net>
date Tue, 10 Mar 2009 01:01:50 -0400
parents 45f8197ffd51
children 1e4b3149365a
files src/ChangeLog src/Makefile.in src/load-save.cc src/ls-ascii-helper.cc src/ls-ascii-helper.h src/ls-mat-ascii.cc src/ls-oct-ascii.cc src/ls-oct-ascii.h src/ov-fcn-handle.cc src/ov-fcn-inline.cc src/ov-range.cc src/ov-str-mat.cc
diffstat 12 files changed, 281 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/ChangeLog	Tue Mar 10 01:01:50 2009 -0400
@@ -1,3 +1,16 @@
+2009-03-09  Benjamin Lindner  <lindnerb@users.sourceforge.net>
+
+	* ls-ascii-helper.h, ls-ascii-helper.cc: New files.
+	* Makefile.in: Add them to the appropriate lists.
+	* load-save.cc (Fload): Open all files in binary mode.
+	* ov-range.cc (load_ascii): Explicitly handle CR and CRLF line endings.
+	* ov-fcn-handle.cc (load_ascii): Likewise.
+	* ov-fcn-inline.cc (load_ascii): Likewise.
+	* ov-str-mat.cc (load_ascii): Likewise.
+	* ls-mat-ascii.cc (get_mat_data_input_line): Likewise.
+	* ls-oct-ascii.cc (extract_keyword, read_ascii_data): Likewise.
+	* ls-oct-ascii.h (extract_keyword): Likewise.
+	
 2009-03-09  John W. Eaton  <jwe@octave.org>
 
 	* graphics.h.in (OCTAVE_DEFAULT_FONTNAME): New macro, defaults to "*".
--- a/src/Makefile.in	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/Makefile.in	Tue Mar 10 01:01:50 2009 -0400
@@ -123,7 +123,7 @@
 	comment-list.h debug.h defun-dld.h defun-int.h defun.h \
 	dirfns.h display.h dynamic-ld.h error.h file-io.h gl-render.h \
 	gripes.h help.h input.h lex.h load-path.h load-save.h ls-hdf5.h \
-	ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h \
+	ls-mat-ascii.h ls-mat4.h ls-mat5.h ls-oct-ascii.h ls-ascii-helper.h \
 	ls-oct-binary.h ls-utils.h mex.h mexproto.h oct-errno.h \
 	oct-fstrm.h oct-hdf5.h oct-hist.h oct-iostrm.h oct-map.h oct-obj.h \
 	oct-prcstrm.h oct-procbuf.h oct-stdstrm.h oct-stream.h \
@@ -217,9 +217,10 @@
 
 DIST_SRC := Cell.cc bitfcns.cc c-file-ptr-stream.cc comment-list.cc \
 	cutils.c data.cc debug.cc defaults.cc defun.cc dirfns.cc \
-	display.cc dynamic-ld.cc error.cc file-io.cc gl-render.cc graphics.cc \
-	gripes.cc help.cc input.cc lex.l load-path.cc load-save.cc \
-	ls-hdf5.cc ls-mat-ascii.cc ls-mat4.cc ls-mat5.cc ls-oct-ascii.cc \
+	display.cc dynamic-ld.cc error.cc file-io.cc gl-render.cc \
+	graphics.cc gripes.cc help.cc input.cc lex.l load-path.cc \
+	load-save.cc ls-hdf5.cc ls-mat-ascii.cc ls-mat4.cc \
+	ls-mat5.cc ls-oct-ascii.cc ls-ascii-helper.cc \
 	ls-oct-binary.cc ls-utils.cc main.c mappers.cc matherr.c \
 	mex.cc oct-fstrm.cc oct-hist.cc oct-iostrm.cc oct-map.cc \
 	oct-obj.cc oct-prcstrm.cc oct-procbuf.cc oct-stream.cc \
--- a/src/load-save.cc	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/load-save.cc	Tue Mar 10 01:01:50 2009 -0400
@@ -791,16 +791,9 @@
 	{
 	  i++;
 
-	  std::ios::openmode mode = std::ios::in;
-
-	  if (format == LS_BINARY
-#ifdef HAVE_HDF5
-	      || format == LS_HDF5
-#endif
-	      || format == LS_MAT_BINARY
-	      || format == LS_MAT5_BINARY
-	      || format == LS_MAT7_BINARY)
-	    mode |= std::ios::binary;
+	  // Always open in binary mode and handle various
+	  // line-endings explicitly.
+	  std::ios::openmode mode = std::ios::in | std::ios::binary;
 
 #ifdef HAVE_ZLIB
 	  if (use_zlib)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ls-ascii-helper.cc	Tue Mar 10 01:01:50 2009 -0400
@@ -0,0 +1,181 @@
+/*
+
+Copyright (C) 2009 Benjamin Lindner
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "ls-ascii-helper.h"
+
+#include <iostream>
+#include <sstream>
+
+// Helper functions when reading from ascii files.
+
+// These function take care of CR/LF issues when files are opened in
+// text-mode for reading.
+
+// Skip characters from stream IS until a newline is reached.
+// Depending on KEEP_NEWLINE, either eat newline from stream or
+// keep it unread.
+
+void
+skip_until_newline (std::istream& is, bool keep_newline)
+{
+  if (! is)
+    return;
+  
+  while (is)
+    {
+      char c = is.peek ();
+
+      if (c == '\n' || c == '\r')
+	{
+	  // Reached newline.
+	  if (! keep_newline)
+	    {
+	      // Eat the CR or LF character.
+	      char d;
+	      is.get (d);
+	      
+	      // Make sure that for binary-mode opened ascii files
+	      // containing CRLF line endings we skip the LF after CR.
+	      if (c == '\r' && is.peek () == '\n')
+		{
+		  // Yes, LF following CR, eat it.
+		  is.get (d);
+		}
+	    }
+	  
+	  // Newline was found, and read from stream if
+	  // keep_newline == true, so exit loop.
+	  break;
+	}
+      else
+	{
+	  // No newline charater peeked, so read it and proceed to next
+	  // character.
+	  char d;
+	  is.get (d);
+	}
+    }
+}
+
+
+// If stream IS currently points to a newline (a leftover from a
+// previous read) then eat newline(s) until a non-newline character is
+// found.
+
+void
+skip_preceeding_newline (std::istream& is)
+{
+  if (! is)
+    return;
+  
+  // Check whether IS currently points to newline character.
+  char c = is.peek ();
+
+  if (c == '\n' || c == '\r')
+    {
+      // Yes, at newline.
+      do
+	{
+	  // Eat the CR or LF character.
+	  char d;
+	  is.get (d);
+	  
+	  // Make sure that for binary-mode opened ascii files
+	  // containing CRLF line endings we skip the LF after CR.
+	  if (c == '\r' && is.peek () == '\n')
+	    {
+	      // Yes, LF following CR, eat it.
+	      is.get (d);
+	  }
+	  
+	  // Peek into next character.
+	  c = is.peek ();
+
+	  // Loop while still a newline ahead.
+	}
+      while (c == '\n' || c == '\r');
+    }
+}
+
+// Read charaters from stream IS until a newline is reached.
+// Depending on KEEP_NEWLINE, either eat newline from stream or keep
+// it unread.  Characters read are stored and returned as
+// std::string.
+
+std::string
+read_until_newline (std::istream& is, bool keep_newline)
+{
+  if (! is)
+    return std::string ();
+  
+  std::ostringstream buf;
+  
+  while (is)
+    {
+      char c = is.peek ();
+
+      if (c == '\n' || c == '\r')
+	{
+	  // Reached newline.
+	  if (! keep_newline)
+	    {
+	      // Eat the CR or LF character.
+	      char d;
+	      is.get (d);
+	      
+	      // Make sure that for binary-mode opened ascii files
+	      // containing CRLF line endings we skip the LF after
+	      // CR.
+
+	      if (c == '\r' && is.peek () == '\n')
+		{
+		  // Yes, LF following CR, eat it.
+		  is.get (d);
+		}
+	    }
+	  
+	  // Newline was found, and read from stream if
+	  // keep_newline == true, so exit loop.
+	  break;
+	}
+      else
+	{
+	  // No newline charater peeked, so read it, store it, and
+	  // proceed to next.
+	  char d;
+	  is.get (d);
+	  buf << d;
+	}
+    }
+  
+  return buf.str ();
+}
+
+/*
+;;; Local Variables: ***
+;;; mode: C++ ***
+;;; End: ***
+*/
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ls-ascii-helper.h	Tue Mar 10 01:01:50 2009 -0400
@@ -0,0 +1,44 @@
+/*
+
+Copyright (C) 2009 Benjamin Lindner
+
+This file is part of Octave.
+
+Octave is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+Octave is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Octave; see the file COPYING.  If not, see
+<http://www.gnu.org/licenses/>.
+
+*/
+
+#if !defined (octave_ls_ascii_helper_h)
+#define octave_ls_ascii_helper_h 1
+
+#include <iostream>
+#include <string>
+
+extern OCTINTERP_API void
+skip_until_newline (std::istream& is, bool keep_newline = false);
+
+extern OCTINTERP_API void
+skip_preceeding_newline (std::istream& is);
+
+extern OCTINTERP_API std::string
+read_until_newline (std::istream& is, bool keep_newline = false);
+
+#endif
+
+/*
+;;; Local Variables: ***
+;;; mode: C++ ***
+;;; End: ***
+*/
--- a/src/ls-mat-ascii.cc	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/ls-mat-ascii.cc	Tue Mar 10 01:01:50 2009 -0400
@@ -51,6 +51,8 @@
 #include "gripes.h"
 #include "lex.h"
 #include "load-save.h"
+#include "ls-ascii-helper.h"
+#include "ls-mat-ascii.h"
 #include "oct-obj.h"
 #include "oct-map.h"
 #include "ov-cell.h"
@@ -63,8 +65,6 @@
 #include "version.h"
 #include "dMatrix.h"
 
-#include "ls-mat-ascii.h"
-
 static std::string
 get_mat_data_input_line (std::istream& is)
 {
@@ -80,15 +80,14 @@
       while (is.get (c))
 	{
 	  if (c == '\n' || c == '\r')
-	    break;
+	    {
+	      skip_until_newline (is, false);
+	      break;
+	    }
 
 	  if (c == '%' || c == '#')
 	    {
-	      // skip to end of line
-	      while (is.get (c))
-		if (c == '\n' || c == '\r')
-		  break;
-
+	      skip_until_newline (is, false);
 	      break;
 	    }
 
--- a/src/ls-oct-ascii.cc	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/ls-oct-ascii.cc	Tue Mar 10 01:01:50 2009 -0400
@@ -51,6 +51,8 @@
 #include "error.h"
 #include "gripes.h"
 #include "load-save.h"
+#include "ls-ascii-helper.h"
+#include "ls-oct-ascii.h"
 #include "oct-obj.h"
 #include "oct-map.h"
 #include "ov-cell.h"
@@ -62,8 +64,6 @@
 #include "version.h"
 #include "dMatrix.h"
 
-#include "ls-oct-ascii.h"
-
 // The number of decimal digits to use when writing ascii data.
 static int Vsave_precision = 16;
 
@@ -123,10 +123,7 @@
 	  else if (next_only)
 	    break;
 	  else
-	    {
-	      while (is.get (c) && c != '\n' && c != '\r')
-		; // Skip to end of line.
-	    }
+	    skip_until_newline (is, false);
 	}
     }
 
--- a/src/ls-oct-ascii.h	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/ls-oct-ascii.h	Tue Mar 10 01:01:50 2009 -0400
@@ -30,6 +30,8 @@
 
 #include "str-vec.h"
 
+#include "ls-ascii-helper.h"
+
 // Flag for cell elements
 #define CELL_ELT_TAG "<cell-element>"
 
@@ -103,8 +105,7 @@
 		is >> value;
 	      if (is)
 		status = true;
-	      while (is.get (c) && c != '\n' && c != '\r')
-		; // Skip to beginning of next line;
+	      skip_until_newline (is, false);
 	      break;
 	    }
 	  else if (next_only)
@@ -165,8 +166,7 @@
 		    is >> value;
 		  if (is)
 		    status = true;
-		  while (is.get (c) && c != '\n' && c != '\r')
-		    ; // Skip to beginning of next line;
+		  skip_until_newline (is, false);
 		  return status;
 		}
 	    }
--- a/src/ov-fcn-handle.cc	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/ov-fcn-handle.cc	Tue Mar 10 01:01:50 2009 -0400
@@ -55,9 +55,10 @@
 #include "oct-env.h"
 
 #include "byte-swap.h"
+#include "ls-ascii-helper.h"
+#include "ls-hdf5.h"
 #include "ls-oct-ascii.h"
 #include "ls-oct-binary.h"
-#include "ls-hdf5.h"
 #include "ls-utils.h"
 
 DEFINE_OCTAVE_ALLOCATOR (octave_fcn_handle);
@@ -297,34 +298,25 @@
 
   if (nm == "@<anonymous>")
     {
-      octave_idx_type len = 0;
-      char c;
-      std::ostringstream buf;
+      skip_preceeding_newline (is);
 
-      // Skip preceeding newline(s).
-      while (is.get (c) && c == '\n')
-	/* do nothing */;
+      std::string buf;
 
       if (is)
 	{
-	  buf << c;
 
 	  // Get a line of text whitespace characters included, leaving
 	  // newline in the stream.
+	  buf = read_until_newline (is, true);
 
-	  while (is.peek () != '\n')
-	    {
-	      is.get (c);
-	      if (! is)
-		break;
-	      buf << c;
-	    }
 	}
 
       pos = is.tellg ();
 
       symbol_table::scope_id local_scope = symbol_table::alloc_scope ();
 
+      octave_idx_type len = 0;
+
       if (extract_keyword (is, "length", len, true) && len >= 0)
 	{
 	  if (len > 0)
@@ -363,7 +355,7 @@
 
 	  int parse_status;
 	  octave_value anon_fcn_handle = 
-	    eval_string (buf.str (), true, parse_status);
+	    eval_string (buf, true, parse_status);
 
 	  if (parse_status == 0)
 	    {
--- a/src/ov-fcn-inline.cc	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/ov-fcn-inline.cc	Tue Mar 10 01:01:50 2009 -0400
@@ -48,6 +48,7 @@
 #include "toplev.h"
 
 #include "byte-swap.h"
+#include "ls-ascii-helper.h"
 #include "ls-oct-ascii.h"
 #include "ls-hdf5.h"
 #include "ls-utils.h"
@@ -162,29 +163,19 @@
       if (nm == "0")
 	nm = "";
 
-      char c;
-      std::ostringstream buf;
+      skip_preceeding_newline (is);
 
-      // Skip preceeding newline(s)
-      while (is.get (c) && c == '\n')
-	/* do nothing */;
+      std::string buf;
 
       if (is)
 	{
-	  buf << c;
 
-	  // Get a line of text whitespace characters included, leaving
-	  // newline in the stream
-	  while (is.peek () != '\n')
-	    {
-	      is.get (c);
-	      if (! is)
-		break;
-	      buf << c;
-	    }
+	  // Get a line of text whitespace characters included,
+	  // leaving newline in the stream.
+	  buf = read_until_newline (is, true);
 	}
 
-      iftext = buf.str ();
+      iftext = buf;
 
       octave_fcn_inline tmp (iftext, ifargs, nm);
       fcn = tmp.fcn;
--- a/src/ov-range.cc	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/ov-range.cc	Tue Mar 10 01:01:50 2009 -0400
@@ -39,6 +39,7 @@
 #include "pr-output.h"
 
 #include "byte-swap.h"
+#include "ls-ascii-helper.h"
 #include "ls-hdf5.h"
 #include "ls-utils.h"
 
@@ -326,14 +327,7 @@
 	break;
     }
 
-  for (;;)
-    {
-      if (is && (c == '%' || c == '#'))
-	while (is.get (c) && c != '\n')
-	  ; // Skip to beginning of next line, ignoring everything.
-      else
-	break;
-    }
+  skip_until_newline (is, false);
 }
 
 bool 
--- a/src/ov-str-mat.cc	Mon Mar 09 17:13:58 2009 -0400
+++ b/src/ov-str-mat.cc	Tue Mar 10 01:01:50 2009 -0400
@@ -36,11 +36,12 @@
 #include "mx-base.h"
 #include "oct-locbuf.h"
 
+#include "byte-swap.h"
 #include "defun.h"
-#include "byte-swap.h"
 #include "gripes.h"
+#include "ls-ascii-helper.h"
+#include "ls-hdf5.h"
 #include "ls-oct-ascii.h"
-#include "ls-hdf5.h"
 #include "ls-utils.h"
 #include "oct-obj.h"
 #include "oct-stream.h"
@@ -316,9 +317,7 @@
 		    {
 		      char *ftmp = tmp.fortran_vec ();
 
-		      // Skip the return line
-		      if (! is.read (ftmp, 1))
-			return false;
+		      skip_preceeding_newline (is);
 
 		      if (! is.read (ftmp, dv.numel ()) || !is)
 			{