Mercurial > octave
changeset 31983:ee187a104701 stable
fopen: Do not convert encoding for file streams with libc++ (bug #63930).
* oct-conf-post-private.in.h: Define OCTAVE_HAVE_STRICT_ENCODING_FACET if
Octave was linked against STL from LLVM or Apple.
* libinterp/corefcn/file-io.cc (Ffopen): Do not convert encoding if
OCTAVE_HAVE_STRICT_ENCODING_FACET is defined.
* test/io.tst: Skip test for encoding conversion if Octave was linked against
STL from LLVM or Apple.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Fri, 07 Apr 2023 17:06:50 +0200 |
parents | 86eb373a6c64 |
children | ff1fed5bf218 2f21784af0b9 |
files | libinterp/corefcn/file-io.cc oct-conf-post-private.in.h test/io.tst |
diffstat | 3 files changed, 48 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/file-io.cc Thu Apr 06 09:29:24 2023 +0200 +++ b/libinterp/corefcn/file-io.cc Fri Apr 07 17:06:50 2023 +0200 @@ -393,9 +393,6 @@ fid = -1; - // Valid names for encodings consist of ASCII characters only. - std::transform (encoding.begin (), encoding.end (), encoding.begin (), - ::tolower); if (encoding.compare ("utf-8")) { // check if encoding is valid @@ -475,6 +472,22 @@ std::string arch = tc_arch.xstring_value ("%s: architecture type must be a string", fcn); std::string encoding = tc_encoding.xstring_value ("%s: ENCODING must be a string", fcn); + // Valid names for encodings consist of ASCII characters only. + std::transform (encoding.begin (), encoding.end (), encoding.begin (), + ::tolower); + + if (encoding == "system") + encoding = octave_locale_charset_wrapper (); + +#if defined (OCTAVE_HAVE_STRICT_ENCODING_FACET) + if (encoding != "utf-8") + { + warning_with_id ("Octave:fopen:encoding-unsupported", + "fopen: encoding must be 'UTF-8' for this version"); + encoding = "utf-8"; + } +#endif + retval = do_stream_open (name, mode, arch, encoding, fid); return retval; @@ -623,8 +636,6 @@ octave_value arch = (nargin > 2) ? args(2) : octave_value ("native"); octave_value encoding = (nargin > 3) ? args(3) : octave_value ("utf-8"); - if (encoding.string_value () == "system") - encoding = octave_value (octave_locale_charset_wrapper ()); int fid = -1; @@ -656,6 +667,21 @@ %! assert (name, ""); %! assert (mode, ""); %! assert (arch, ""); + +## FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET +%!testif HAVE_LLVM_LIBCXX +%! fname = tempname (); +%! unwind_protect +%! fail ("fid = fopen (fname, 'wb', 'n', 'Windows-1252')", ... +%! "warning", "encoding must be 'UTF-8'"); +%! [name, mode, arch, encoding] = fopen (fid); +%! assert (name, fname); +%! assert (mode, "wb"); +%! assert (encoding, "utf-8"); # fallback after warning +%! unwind_protect_cleanup +%! fclose (fid); +%! unlink (fname); +%! end_unwind_protect */ DEFMETHOD (freport, interp, args, , @@ -907,7 +933,8 @@ /* ## Check if text is correctly converted to output encoding -%!test <*61839> +# FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET +%!testif ; ! __have_feature__ ("LLVM_LIBCXX") <*61839> %! str = "aäöu"; # string with non-ASCII characters %! fname = tempname (); %! fid = fopen (fname, "wt", "n", "ISO-8859-1"); @@ -2301,7 +2328,8 @@ %! assert (obs, { [0; 1; NaN; 2; 3] }); ## file stream with encoding -%!test +## FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET +%!testif ; ! __have_feature__ ("LLVM_LIBCXX") %! f = tempname (); %! fid = fopen (f, "wt+", "n", "iso-8859-1"); %! unwind_protect
--- a/oct-conf-post-private.in.h Thu Apr 06 09:29:24 2023 +0200 +++ b/oct-conf-post-private.in.h Fri Apr 07 17:06:50 2023 +0200 @@ -91,6 +91,14 @@ # endif #endif +#if defined (HAVE_LLVM_LIBCXX) +/* The stream encoding facet from libc++ is stricter than libstdc++ when + it comes to reverting the stream. Disable encoding conversion for file + streams with libc++. + FIXME: Maybe use a more specific test. */ +# define OCTAVE_HAVE_STRICT_ENCODING_FACET 1 +#endif + /* Make all .oct file interpreter functions and methods static. */ #define OCTAVE_USE_STATIC_DEFUN
--- a/test/io.tst Thu Apr 06 09:29:24 2023 +0200 +++ b/test/io.tst Fri Apr 07 17:06:50 2023 +0200 @@ -665,7 +665,9 @@ %! unlink (nm); %! endif -%!test # write to and read from file with encoding +# write to and read from file with encoding +# FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET +%!testif ; ! __have_feature__ ("LLVM_LIBCXX") %! temp_file = [tempname(), ".txt"]; %! fid = fopen (temp_file, "wt", "n", "iso-8859-1"); %! unwind_protect @@ -962,7 +964,8 @@ %! endfor # stream with transcoding -%!test <*63930> +# FIXME: should be conditional on OCTAVE_HAVE_STRICT_ENCODING_FACET +%!testif ; ! __have_feature__ ("LLVM_LIBCXX") <*63930> %! w_modes = {"wb", "wt"}; %! # 64 non-ASCII characters that can be represented in 'windows-1252' %! f_texts{1} = repmat ('ÀÂÈÊÌàäéèêìîöòùû', 1, 4);