changeset 32021:da2954782945

gui: Use iconv/gnulib to save editor content in non-UTF-8. * bootstrap.conf: Add module "uniconv/u16-conv-to-enc". * liboctave/wrappers/uniconv-wrappers.c, liboctave/wrappers/uniconv-wrappers.h (octave_u16_conv_to_encoding, octave_u16_conv_to_encoding_strict): Add wrappers for gnulib function. * liboctave/util/oct-string.cc, liboctave/util/oct-string.h (octave::string::u16_to_encoding): Add new function. * libgui/src/m-editor/file-editor-tab.h (octave::file_editor_tab::check_valid_codec): Change return type of function. * libgui/src/m-editor/file-editor-tab.cc (octave::file_editor_tab::do_save_file): Use iconv/gnulib functions to save file editor content in non-UTF-8 encoding. The interpreter needs to be able to read these files using the iconv/gnulib functions anyway. And Qt6 can only convert to UTF-8, UTF-16, UTF-32 or the system locale encoding. (octave::file_editor_tab::check_valid_codec): Use iconv/gnulib facilities to check if selected encoding is valid for file editor content. (octave::file_editor_tab::save_file): Adjust for changed return type of check_valid_codec.
author Markus Mützel <markus.muetzel@gmx.de>
date Sat, 15 Apr 2023 11:29:15 +0200
parents a5413268eb47
children 5945e1bd73ea
files bootstrap.conf libgui/src/m-editor/file-editor-tab.cc libgui/src/m-editor/file-editor-tab.h liboctave/util/oct-string.cc liboctave/util/oct-string.h liboctave/wrappers/uniconv-wrappers.c liboctave/wrappers/uniconv-wrappers.h
diffstat 7 files changed, 162 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/bootstrap.conf	Fri Apr 14 21:24:06 2023 -0400
+++ b/bootstrap.conf	Sat Apr 15 11:29:15 2023 +0200
@@ -105,6 +105,7 @@
   uname
   unicase/u8-tolower
   unicase/u8-toupper
+  uniconv/u16-conv-to-enc
   uniconv/u32-conv-to-enc
   uniconv/u8-conv-from-enc
   uniconv/u8-conv-to-enc
--- a/libgui/src/m-editor/file-editor-tab.cc	Fri Apr 14 21:24:06 2023 -0400
+++ b/libgui/src/m-editor/file-editor-tab.cc	Sat Apr 15 11:29:15 2023 +0200
@@ -80,6 +80,7 @@
 
 #include "cmd-edit.h"
 #include "file-ops.h"
+#include "iconv-wrappers.h"
 #include "localcharset-wrapper.h"
 #include "uniconv-wrappers.h"
 
@@ -2182,9 +2183,8 @@
 
   m_encoding = m_new_encoding;    // consider a possible new encoding
 
-  // set the desired codec (if suitable for contents)
-  QTextCodec *codec = check_valid_codec ();
-  if (! codec)
+  // check if the selected encoding is suitable for the content
+  if (! check_valid_codec ())
     return;   // No valid codec
 
   // Get a list of breakpoint line numbers, before exiting debug mode
@@ -2346,31 +2346,72 @@
       return;
     }
 
-  // save the contents into the file
-
-  // write the file
-  QTextStream out (&file);
+  // target encoding
+  std::string encoding = m_encoding.toStdString ();
+  if (encoding.compare (0, 6, "SYSTEM") == 0)
+    encoding = octave_locale_charset_wrapper ();
+
+  // check if selected encoding is suitable for contents
+  if (! check_valid_codec ())
+    {
+      // begin watching file again if it was being watched previously
+      if (trackedFiles.contains (file_to_save))
+        m_file_system_watcher.addPath (file_to_save);
+
+      return;  // no valid codec
+    }
+
+  // save contents of editor in file
+
+  QApplication::setOverrideCursor (Qt::WaitCursor);
+
+  if (encoding == "utf-8" || encoding == "UTF-8")
+    {
+      // use Qt encoding conversion for UTF-8
+      QTextStream out (&file);
 
 #if HAVE_QTEXTSTREAM_SETENCODING
-  // FIXME: Check and set encoding!
+      out.setEncoding (QStringConverter::Utf8);
 #else
-  // set the desired codec (if suitable for contents)
-  QTextCodec *codec = check_valid_codec ();
-  if (! codec)
-    return;   // No valid codec
-
-  // Save the file
-  out.setCodec (codec);
+      out.setCodec ("UTF-8");
 #endif
 
-  QApplication::setOverrideCursor (Qt::WaitCursor);
-
-  out << m_edit_area->text ();
-  if (settings.bool_value (ed_force_newline)
-      && m_edit_area->text ().length ())
-    out << m_edit_area->eol_string ();   // Add newline if desired
-
-  out.flush ();
+      out << m_edit_area->text ();
+
+      // add newline if desired
+      if (settings.bool_value (ed_force_newline)
+          && m_edit_area->text ().length ())
+        out << m_edit_area->eol_string ();
+
+      out.flush ();
+    }
+  else
+    {
+      // use iconv/gnulib for all other output encodings
+      QDataStream out (&file);
+
+      // get natively UTF-16 encoded content of the QString as STL type
+      std::u16string u16_string = m_edit_area->text ().toStdU16String ();
+
+      // convert to output encoding
+      std::string native_string
+        = string::u16_to_encoding ("file editor", u16_string, encoding);
+
+      // save file
+      out.writeRawData (native_string.c_str (), native_string.size ());
+
+      // add newline if desired
+      if (settings.bool_value (ed_force_newline)
+          && m_edit_area->text ().length ())
+        {
+          std::u16string u16_newline
+            = m_edit_area->eol_string ().toStdU16String ();
+          std::string newline
+            = string::u16_to_encoding ("file editor", u16_newline, encoding);
+          out.writeRawData (newline.c_str (), newline.size ());
+        }
+    }
+
   QApplication::restoreOverrideCursor ();
 
   // Finish writing by committing the changes to disk,
@@ -2550,50 +2591,45 @@
   return false;
 }
 
-QTextCodec* file_editor_tab::check_valid_codec ()
+bool file_editor_tab::check_valid_codec ()
 {
-  QTextCodec *codec = QTextCodec::codecForName (m_encoding.toLatin1 ());
-
-  // "SYSTEM" is used as alias for the locale encoding.
-  if ((! codec) && m_encoding.startsWith("SYSTEM"))
-    codec = QTextCodec::codecForLocale ();
-
-  if (! codec)
-    {
-      QMessageBox::critical (nullptr,
-                             tr ("Octave Editor"),
-                             tr ("The current encoding %1\n"
-                                 "can not be applied.\n\n"
-                                 "Please select another one!").arg (m_encoding));
-
-      return nullptr;
-    }
-
   QString editor_text = m_edit_area->text ();
-  bool can_encode = codec->canEncode (editor_text);
-
-  // We cannot rely on QTextCodec::canEncode because it uses the
-  // ConverterState of convertFromUnicode which isn't updated by some
-  // implementations.
-  if (can_encode)
+
+  // target encoding
+  std::string encoding = m_encoding.toStdString ();
+  if (encoding.compare (0, 6, "SYSTEM") == 0)
+    encoding = octave_locale_charset_wrapper ();
+
+  if (encoding == "UTF-8" || encoding == "utf-8")
+    return true;
+
+  // check if encoding is valid
+  void *codec = octave_iconv_open_wrapper (encoding.c_str (), "utf-8");
+  if (codec == reinterpret_cast<void *> (-1))
     {
-      QVector<uint> u32_str = editor_text.toUcs4 ();
-      const uint32_t *src = reinterpret_cast<const uint32_t *>
-                            (u32_str.data ());
-
-      std::size_t length;
-      const std::string encoding = m_encoding.toStdString ();
-      char *res_str =
-        octave_u32_conv_to_encoding_strict (encoding.c_str (), src,
-                                            u32_str.size (), &length);
-      if (! res_str)
-        {
-          if (errno == EILSEQ)
-            can_encode = false;
-        }
-      else
-        ::free (static_cast<void *> (res_str));
+      if (errno == EINVAL)
+        return false;
     }
+  else
+    octave_iconv_close_wrapper (codec);
+
+  // check if all characters in the editor can be encoded in the target encoding
+  bool can_encode = true;
+  std::u16string u16_str = editor_text.toStdU16String ();
+  const uint16_t *src = reinterpret_cast<const uint16_t *>
+                        (u16_str.c_str ());
+
+  std::size_t length;
+  char *res_str =
+    octave_u16_conv_to_encoding_strict (encoding.c_str (), src,
+                                        u16_str.size (), &length);
+  if (! res_str)
+    {
+      if (errno == EILSEQ)
+        can_encode = false;
+    }
+  else
+    ::free (static_cast<void *> (res_str));
 
   if (! can_encode)
     {
@@ -2608,12 +2644,10 @@
                                  QMessageBox::Cancel);
 
       if (pressed_button == QMessageBox::Ignore)
-        return codec;
-      else
-        return nullptr;
+        can_encode = true;
     }
 
-  return codec;
+  return can_encode;
 }
 
 void file_editor_tab::handle_save_file_as_answer (const QString& save_file_name)
--- a/libgui/src/m-editor/file-editor-tab.h	Fri Apr 14 21:24:06 2023 -0400
+++ b/libgui/src/m-editor/file-editor-tab.h	Sat Apr 15 11:29:15 2023 +0200
@@ -266,7 +266,7 @@
                   bool restore_breakpoints = true);
   void save_file_as (bool remove_on_success = false);
   bool check_valid_identifier (QString file_name);
-  QTextCodec * check_valid_codec ();
+  bool check_valid_codec ();
 
   bool unchanged_or_saved ();
 
--- a/liboctave/util/oct-string.cc	Fri Apr 14 21:24:06 2023 -0400
+++ b/liboctave/util/oct-string.cc	Sat Apr 15 11:29:15 2023 +0200
@@ -607,6 +607,38 @@
   return num_replacements;
 }
 
+std::string
+octave::string::u16_to_encoding (const std::string& who,
+                                 const std::u16string& u16_string,
+                                 const std::string& encoding)
+{
+  const uint16_t *src = reinterpret_cast<const uint16_t *>
+                        (u16_string.c_str ());
+  std::size_t srclen = u16_string.length ();
+
+  std::size_t length;
+  char *native_str = octave_u16_conv_to_encoding (encoding.c_str (), src,
+                                                  srclen, &length);
+
+  if (! native_str)
+    {
+      if (errno == ENOSYS)
+        (*current_liboctave_error_handler)
+          ("%s: iconv() is not supported. Installing GNU libiconv and then "
+           "re-compiling Octave could fix this.", who.c_str ());
+      else
+        (*current_liboctave_error_handler)
+          ("%s: converting from UTF-16 to codepage '%s' failed: %s",
+           who.c_str (), encoding.c_str (), std::strerror (errno));
+    }
+
+  octave::unwind_action free_native_str ([=] () { ::free (native_str); });
+
+  std::string retval = std::string (native_str, length);
+
+  return retval;
+}
+
 typedef octave::string::codecvt_u8::InternT InternT;
 typedef octave::string::codecvt_u8::ExternT ExternT;
 typedef octave::string::codecvt_u8::StateT StateT;
--- a/liboctave/util/oct-string.h	Fri Apr 14 21:24:06 2023 -0400
+++ b/liboctave/util/oct-string.h	Sat Apr 15 11:29:15 2023 +0200
@@ -158,6 +158,9 @@
 u8_validate (const std::string& who, std::string& in_string,
              const u8_fallback_type type = U8_REPLACEMENT_CHAR);
 
+extern OCTAVE_API std::string
+u16_to_encoding (const std::string& who, const std::u16string& u16_string,
+                 const std::string& encoding);
 
 template<class Facet>
 struct deletable_facet : Facet
--- a/liboctave/wrappers/uniconv-wrappers.c	Fri Apr 14 21:24:06 2023 -0400
+++ b/liboctave/wrappers/uniconv-wrappers.c	Sat Apr 15 11:29:15 2023 +0200
@@ -93,6 +93,22 @@
 }
 
 char *
+octave_u16_conv_to_encoding (const char *tocode, const uint16_t *src,
+                             size_t srclen, size_t *lengthp)
+{
+  return u16_conv_to_encoding (tocode, iconveh_question_mark,
+                               src, srclen, NULL, NULL, lengthp);
+}
+
+char *
+octave_u16_conv_to_encoding_strict (const char *tocode, const uint16_t *src,
+                                    size_t srclen, size_t *lengthp)
+{
+  return u16_conv_to_encoding (tocode, iconveh_error,
+                               src, srclen, NULL, NULL, lengthp);
+}
+
+char *
 octave_u32_conv_to_encoding_strict (const char *tocode, const uint32_t *src,
                                     size_t srclen, size_t *lengthp)
 {
--- a/liboctave/wrappers/uniconv-wrappers.h	Fri Apr 14 21:24:06 2023 -0400
+++ b/liboctave/wrappers/uniconv-wrappers.h	Sat Apr 15 11:29:15 2023 +0200
@@ -54,6 +54,14 @@
                                    size_t srclen, size_t *lengthp);
 
 extern OCTAVE_API char *
+octave_u16_conv_to_encoding (const char *tocode, const uint16_t *src,
+                             size_t srclen, size_t *lengthp);
+
+extern OCTAVE_API char *
+octave_u16_conv_to_encoding_strict (const char *tocode, const uint16_t *src,
+                                    size_t srclen, size_t *lengthp);
+
+extern OCTAVE_API char *
 octave_u32_conv_to_encoding_strict (const char *tocode, const uint32_t *src,
                                     size_t srclen, size_t *lengthp);