changeset 31983:ee187a104701 stable

fopen: Do not convert encoding for file streams with libc++ (bug #63930). * oct-conf-post-private.in.h: Define OCTAVE_HAVE_STRICT_ENCODING_FACET if Octave was linked against STL from LLVM or Apple. * libinterp/corefcn/file-io.cc (Ffopen): Do not convert encoding if OCTAVE_HAVE_STRICT_ENCODING_FACET is defined. * test/io.tst: Skip test for encoding conversion if Octave was linked against STL from LLVM or Apple.
author Markus Mützel <markus.muetzel@gmx.de>
date Fri, 07 Apr 2023 17:06:50 +0200
parents 86eb373a6c64
children ff1fed5bf218 2f21784af0b9
files libinterp/corefcn/file-io.cc oct-conf-post-private.in.h test/io.tst
diffstat 3 files changed, 48 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/file-io.cc	Thu Apr 06 09:29:24 2023 +0200
+++ b/libinterp/corefcn/file-io.cc	Fri Apr 07 17:06:50 2023 +0200
@@ -393,9 +393,6 @@
 
   fid = -1;
 
-  // Valid names for encodings consist of ASCII characters only.
-  std::transform (encoding.begin (), encoding.end (), encoding.begin (),
-                  ::tolower);
   if (encoding.compare ("utf-8"))
     {
       // check if encoding is valid
@@ -475,6 +472,22 @@
   std::string arch = tc_arch.xstring_value ("%s: architecture type must be a string", fcn);
   std::string encoding = tc_encoding.xstring_value ("%s: ENCODING must be a string", fcn);
 
+  // Valid names for encodings consist of ASCII characters only.
+  std::transform (encoding.begin (), encoding.end (), encoding.begin (),
+                  ::tolower);
+
+  if (encoding == "system")
+    encoding = octave_locale_charset_wrapper ();
+
+#if defined (OCTAVE_HAVE_STRICT_ENCODING_FACET)
+  if (encoding != "utf-8")
+    {
+      warning_with_id ("Octave:fopen:encoding-unsupported",
+                       "fopen: encoding must be 'UTF-8' for this version");
+      encoding = "utf-8";
+    }
+#endif
+
   retval = do_stream_open (name, mode, arch, encoding, fid);
 
   return retval;
@@ -623,8 +636,6 @@
   octave_value arch = (nargin > 2) ? args(2) : octave_value ("native");
 
   octave_value encoding = (nargin > 3) ? args(3) : octave_value ("utf-8");
-  if (encoding.string_value () == "system")
-    encoding = octave_value (octave_locale_charset_wrapper ());
 
   int fid = -1;
 
@@ -656,6 +667,21 @@
 %! assert (name, "");
 %! assert (mode, "");
 %! assert (arch, "");
+
+## FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif HAVE_LLVM_LIBCXX
+%! fname = tempname ();
+%! unwind_protect
+%!   fail ("fid = fopen (fname, 'wb', 'n', 'Windows-1252')", ...
+%!         "warning", "encoding must be 'UTF-8'");
+%!   [name, mode, arch, encoding] = fopen (fid);
+%!   assert (name, fname);
+%!   assert (mode, "wb");
+%!   assert (encoding, "utf-8");  # fallback after warning
+%! unwind_protect_cleanup
+%!   fclose (fid);
+%!   unlink (fname);
+%! end_unwind_protect
 */
 
 DEFMETHOD (freport, interp, args, ,
@@ -907,7 +933,8 @@
 
 /*
 ## Check if text is correctly converted to output encoding
-%!test <*61839>
+# FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif ; ! __have_feature__ ("LLVM_LIBCXX")  <*61839>
 %! str = "aäöu";  # string with non-ASCII characters
 %! fname = tempname ();
 %! fid = fopen (fname, "wt", "n", "ISO-8859-1");
@@ -2301,7 +2328,8 @@
 %! assert (obs, { [0; 1; NaN; 2; 3] });
 
 ## file stream with encoding
-%!test
+## FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif ; ! __have_feature__ ("LLVM_LIBCXX")
 %! f = tempname ();
 %! fid = fopen (f, "wt+", "n", "iso-8859-1");
 %! unwind_protect
--- a/oct-conf-post-private.in.h	Thu Apr 06 09:29:24 2023 +0200
+++ b/oct-conf-post-private.in.h	Fri Apr 07 17:06:50 2023 +0200
@@ -91,6 +91,14 @@
 #  endif
 #endif
 
+#if defined (HAVE_LLVM_LIBCXX)
+/* The stream encoding facet from libc++ is stricter than libstdc++ when
+   it comes to reverting the stream.  Disable encoding conversion for file
+   streams with libc++.
+   FIXME: Maybe use a more specific test.  */
+#  define OCTAVE_HAVE_STRICT_ENCODING_FACET 1
+#endif
+
 /* Make all .oct file interpreter functions and methods static.  */
 #define OCTAVE_USE_STATIC_DEFUN
 
--- a/test/io.tst	Thu Apr 06 09:29:24 2023 +0200
+++ b/test/io.tst	Fri Apr 07 17:06:50 2023 +0200
@@ -665,7 +665,9 @@
 %!   unlink (nm);
 %! endif
 
-%!test   # write to and read from file with encoding
+# write to and read from file with encoding
+# FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif ; ! __have_feature__ ("LLVM_LIBCXX")
 %! temp_file = [tempname(), ".txt"];
 %! fid = fopen (temp_file, "wt", "n", "iso-8859-1");
 %! unwind_protect
@@ -962,7 +964,8 @@
 %!  endfor
 
 # stream with transcoding
-%!test <*63930>
+# FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif ; ! __have_feature__ ("LLVM_LIBCXX") <*63930>
 %! w_modes = {"wb", "wt"};
 %! # 64 non-ASCII characters that can be represented in 'windows-1252'
 %! f_texts{1} = repmat ('ÀÂÈÊÌàäéèêìîöòùû', 1, 4);