changeset 31984:ff1fed5bf218

maint: Merge stable to default.
author Markus Mützel <markus.muetzel@gmx.de>
date Sat, 08 Apr 2023 09:48:50 +0200
parents 61bf59bcde72 (current diff) ee187a104701 (diff)
children baf8322763c0
files configure.ac m4/acinclude.m4 test/io.tst
diffstat 5 files changed, 79 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/configure.ac	Fri Apr 07 13:37:16 2023 -0700
+++ b/configure.ac	Sat Apr 08 09:48:50 2023 +0200
@@ -531,6 +531,10 @@
 AC_DEFINE_UNQUOTED(OCTAVE_IDX_TYPE, [$OCTAVE_IDX_TYPE],
   [Define to the type of octave_idx_type (64 or 32 bit signed integer).])
 
+### Check for LLVM or Apple libc++ library.
+
+OCTAVE_LLVM_LIBCXX
+
 ### Check for pthread library.
 
 AX_PTHREAD
--- a/libinterp/corefcn/file-io.cc	Fri Apr 07 13:37:16 2023 -0700
+++ b/libinterp/corefcn/file-io.cc	Sat Apr 08 09:48:50 2023 +0200
@@ -393,9 +393,6 @@
 
   fid = -1;
 
-  // Valid names for encodings consist of ASCII characters only.
-  std::transform (encoding.begin (), encoding.end (), encoding.begin (),
-                  ::tolower);
   if (encoding.compare ("utf-8"))
     {
       // check if encoding is valid
@@ -475,6 +472,22 @@
   std::string arch = tc_arch.xstring_value ("%s: architecture type must be a string", fcn);
   std::string encoding = tc_encoding.xstring_value ("%s: ENCODING must be a string", fcn);
 
+  // Valid names for encodings consist of ASCII characters only.
+  std::transform (encoding.begin (), encoding.end (), encoding.begin (),
+                  ::tolower);
+
+  if (encoding == "system")
+    encoding = octave_locale_charset_wrapper ();
+
+#if defined (OCTAVE_HAVE_STRICT_ENCODING_FACET)
+  if (encoding != "utf-8")
+    {
+      warning_with_id ("Octave:fopen:encoding-unsupported",
+                       "fopen: encoding must be 'UTF-8' for this version");
+      encoding = "utf-8";
+    }
+#endif
+
   retval = do_stream_open (name, mode, arch, encoding, fid);
 
   return retval;
@@ -623,8 +636,6 @@
   octave_value arch = (nargin > 2) ? args(2) : octave_value ("native");
 
   octave_value encoding = (nargin > 3) ? args(3) : octave_value ("utf-8");
-  if (encoding.string_value () == "system")
-    encoding = octave_value (octave_locale_charset_wrapper ());
 
   int fid = -1;
 
@@ -656,6 +667,21 @@
 %! assert (name, "");
 %! assert (mode, "");
 %! assert (arch, "");
+
+## FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif HAVE_LLVM_LIBCXX
+%! fname = tempname ();
+%! unwind_protect
+%!   fail ("fid = fopen (fname, 'wb', 'n', 'Windows-1252')", ...
+%!         "warning", "encoding must be 'UTF-8'");
+%!   [name, mode, arch, encoding] = fopen (fid);
+%!   assert (name, fname);
+%!   assert (mode, "wb");
+%!   assert (encoding, "utf-8");  # fallback after warning
+%! unwind_protect_cleanup
+%!   fclose (fid);
+%!   unlink (fname);
+%! end_unwind_protect
 */
 
 DEFMETHOD (freport, interp, args, ,
@@ -907,7 +933,8 @@
 
 /*
 ## Check if text is correctly converted to output encoding
-%!test <*61839>
+# FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif ; ! __have_feature__ ("LLVM_LIBCXX")  <*61839>
 %! str = "aäöu";  # string with non-ASCII characters
 %! fname = tempname ();
 %! fid = fopen (fname, "wt", "n", "ISO-8859-1");
@@ -2301,7 +2328,8 @@
 %! assert (obs, { [0; 1; NaN; 2; 3] });
 
 ## file stream with encoding
-%!test
+## FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif ; ! __have_feature__ ("LLVM_LIBCXX")
 %! f = tempname ();
 %! fid = fopen (f, "wt+", "n", "iso-8859-1");
 %! unwind_protect
--- a/m4/acinclude.m4	Fri Apr 07 13:37:16 2023 -0700
+++ b/m4/acinclude.m4	Sat Apr 08 09:48:50 2023 +0200
@@ -167,6 +167,33 @@
   fi
 ])
 dnl
+dnl Check for LLVM or Apple libc++ library.
+dnl
+AC_DEFUN([OCTAVE_LLVM_LIBCXX], [
+  AC_CACHE_CHECK([whether using STL from LLVM or Apple],
+    [octave_cv_llvm_libcxx],
+    [AC_LANG_PUSH(C++)
+    AC_RUN_IFELSE([AC_LANG_PROGRAM([[
+        // Include any header from the STL
+        #include <iostream>
+        ]], [[
+        #if defined (_LIBCPP_VERSION)
+          return (0);
+        #else
+          return (1);
+        #endif
+      ]])],
+      octave_cv_llvm_libcxx=yes,
+      octave_cv_llvm_libcxx=no,
+      octave_cv_llvm_libcxx=no)
+    AC_LANG_POP(C++)
+  ])
+  if test $octave_cv_llvm_libcxx = yes; then
+    AC_DEFINE(HAVE_LLVM_LIBCXX, 1,
+      [Define to 1 if linking to LLVM or Apple libc++.])
+  fi
+])
+dnl
 dnl Check whether std::pmr::polymorphic_allocator is available.
 dnl
 AC_DEFUN([OCTAVE_CHECK_STD_PMR_POLYMORPHIC_ALLOCATOR], [
--- a/oct-conf-post-private.in.h	Fri Apr 07 13:37:16 2023 -0700
+++ b/oct-conf-post-private.in.h	Sat Apr 08 09:48:50 2023 +0200
@@ -91,6 +91,14 @@
 #  endif
 #endif
 
+#if defined (HAVE_LLVM_LIBCXX)
+/* The stream encoding facet from libc++ is stricter than libstdc++ when
+   it comes to reverting the stream.  Disable encoding conversion for file
+   streams with libc++.
+   FIXME: Maybe use a more specific test.  */
+#  define OCTAVE_HAVE_STRICT_ENCODING_FACET 1
+#endif
+
 /* Make all .oct file interpreter functions and methods static.  */
 #define OCTAVE_USE_STATIC_DEFUN
 
--- a/test/io.tst	Fri Apr 07 13:37:16 2023 -0700
+++ b/test/io.tst	Sat Apr 08 09:48:50 2023 +0200
@@ -938,7 +938,9 @@
 %!   unlink (nm);
 %! endif
 
-%!test   # write to and read from file with encoding
+# write to and read from file with encoding
+# FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif ; ! __have_feature__ ("LLVM_LIBCXX")
 %! temp_file = [tempname(), ".txt"];
 %! fid = fopen (temp_file, "wt", "n", "iso-8859-1");
 %! unwind_protect
@@ -1235,7 +1237,8 @@
 %!  endfor
 
 # stream with transcoding
-%!test <*63930>
+# FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET
+%!testif ; ! __have_feature__ ("LLVM_LIBCXX") <*63930>
 %! w_modes = {"wb", "wt"};
 %! # 64 non-ASCII characters that can be represented in 'windows-1252'
 %! f_texts{1} = repmat ('ÀÂÈÊÌàäéèêìîöòùû', 1, 4);