changeset 32023:bce1850f8104

gui: Use iconv/gnulib to load editor content from files in any encoding. * bootstrap.conf: Add module "uniconv/u16-conv-from-enc". * liboctave/wrappers/uniconv-wrappers.c, liboctave/wrappers/uniconv-wrappers.h (octave_u16_conv_from_encoding, octave_u16_conv_from_encoding_strict): Add wrappers for gnulib function. * liboctave/util/oct-string.cc, liboctave/util/oct-string.h (octave::string::u16_to_encoding): Add new function. * libgui/src/m-editor/file-editor-tab.h: Remove header that is no longer needed. * libgui/src/m-editor/file-editor-tab.cc (octave::file_editor_tab::load_file): Use iconv/gnulib functions to load file editor content from file in any encoding. The interpreter needs to be able to read these files using the iconv/gnulib functions anyway. And Qt6 can only convert from UTF-8, UTF-16, UTF-32 or the system locale encoding.
author Markus Mützel <markus.muetzel@gmx.de>
date Sat, 15 Apr 2023 20:51:50 +0200
parents 5945e1bd73ea
children 6db223403fdb
files bootstrap.conf libgui/src/m-editor/file-editor-tab.cc libgui/src/m-editor/file-editor-tab.h liboctave/wrappers/uniconv-wrappers.c liboctave/wrappers/uniconv-wrappers.h
diffstat 5 files changed, 72 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/bootstrap.conf	Sat Apr 15 07:17:15 2023 -0700
+++ b/bootstrap.conf	Sat Apr 15 20:51:50 2023 +0200
@@ -105,6 +105,7 @@
   uname
   unicase/u8-tolower
   unicase/u8-toupper
+  uniconv/u16-conv-from-enc
   uniconv/u16-conv-to-enc
   uniconv/u32-conv-to-enc
   uniconv/u8-conv-from-enc
--- a/libgui/src/m-editor/file-editor-tab.cc	Sat Apr 15 07:17:15 2023 -0700
+++ b/libgui/src/m-editor/file-editor-tab.cc	Sat Apr 15 20:51:50 2023 +0200
@@ -1881,21 +1881,50 @@
         text_data.chop (1);
     }
 
-  // decode
-  QTextCodec::ConverterState st;
-  QTextCodec *codec = QTextCodec::codecForName (m_encoding.toLatin1 ());
-  if (codec == nullptr)
-    codec = QTextCodec::codecForLocale ();
-
-  const QString text = codec->toUnicode(text_data.constData(),
-                                        text_data.size(), &st);
-
-  // Decoding with invalid characters?
-  if (st.invalidChars > 0)
+  // expected file encoding
+  std::string encoding = m_encoding.toStdString ();
+  if (encoding.compare (0, 6, "SYSTEM") == 0)
+    encoding = octave_locale_charset_wrapper ();
+
+  // check if the selected encoding can be used to decode the file
+
+  const char *src = text_data.constData ();
+  std::size_t srclen = text_data.length ();
+
+  std::size_t length;
+  uint16_t *u16_str;
+
+  // try to convert encoding in strict mode
+  u16_str = octave_u16_conv_from_encoding_strict (encoding.c_str (),
+                                                  src, srclen, &length);
+
+  // check for invalid characters in input file
+  if (! u16_str)
     {
       // Set read only
       m_edit_area->setReadOnly (true);
 
+      // convert encoding allowing replacements
+      u16_str = octave_u16_conv_from_encoding (encoding.c_str (),
+                                               src, srclen, &length);
+
+      if (! u16_str)
+        {
+          // FIXME: Can this ever happen?
+
+          // non-modal error message box
+          QMessageBox *msgBox
+            = new QMessageBox (QMessageBox::Critical,
+                               tr ("Octave Editor"),
+                               tr ("Unable to read file '%1'\n"
+                                   "with selected encoding '%2': %3")
+                                  .arg (file_to_load).arg (m_encoding)
+                                  .arg (std::strerror (errno)),
+                               QMessageBox::Ok, nullptr);
+          show_dialog (msgBox, false);
+          return QString ();
+        }
+
       // Message box for user decision
       QString msg = tr ("There were problems reading the file\n"
                         "%1\n"
@@ -1919,13 +1948,18 @@
       msg_box->show ();
     }
 
+  unwind_action free_u16_str ([=] () { ::free (u16_str); });
+
+  QString text
+    = QString::fromUtf16 (reinterpret_cast<char16_t *> (u16_str), length);
+
   m_edit_area->setText (text);
   m_edit_area->setEolMode (detect_eol_mode ());
 
   QApplication::restoreOverrideCursor ();
 
-  m_copy_available = false;     // no selection yet available
-  m_edit_area->setModified (false); // loaded file is not modified yet
+  m_copy_available = false;  // no selection yet available
+  m_edit_area->setModified (false);  // loaded file is not modified yet
   set_file_name (file_to_load);
 
   update_eol_indicator ();
--- a/libgui/src/m-editor/file-editor-tab.h	Sat Apr 15 07:17:15 2023 -0700
+++ b/libgui/src/m-editor/file-editor-tab.h	Sat Apr 15 20:51:50 2023 +0200
@@ -33,7 +33,6 @@
 #include <QFileSystemWatcher>
 #include <QLabel>
 #include <QStatusBar>
-#include <QTextCodec>
 #include <QWidget>
 #include <Qsci/qsciapis.h>
 
--- a/liboctave/wrappers/uniconv-wrappers.c	Sat Apr 15 07:17:15 2023 -0700
+++ b/liboctave/wrappers/uniconv-wrappers.c	Sat Apr 15 20:51:50 2023 +0200
@@ -92,6 +92,22 @@
                                             src, srclen, NULL, lengthp);
 }
 
+uint16_t *
+octave_u16_conv_from_encoding (const char *fromcode, const char *src,
+                               size_t srclen, size_t *lengthp)
+{
+  return u16_conv_from_encoding (fromcode, iconveh_question_mark,
+                                 src, srclen, NULL, NULL, lengthp);
+}
+
+uint16_t *
+octave_u16_conv_from_encoding_strict (const char *fromcode, const char *src,
+                                      size_t srclen, size_t *lengthp)
+{
+  return u16_conv_from_encoding (fromcode, iconveh_error,
+                                 src, srclen, NULL, NULL, lengthp);
+}
+
 char *
 octave_u16_conv_to_encoding (const char *tocode, const uint16_t *src,
                              size_t srclen, size_t *lengthp)
--- a/liboctave/wrappers/uniconv-wrappers.h	Sat Apr 15 07:17:15 2023 -0700
+++ b/liboctave/wrappers/uniconv-wrappers.h	Sat Apr 15 20:51:50 2023 +0200
@@ -53,6 +53,14 @@
 octave_u8_conv_to_encoding_strict (const char *tocode, const uint8_t *src,
                                    size_t srclen, size_t *lengthp);
 
+extern OCTAVE_API uint16_t *
+octave_u16_conv_from_encoding (const char *fromcode, const char *src,
+                               size_t srclen, size_t *lengthp);
+
+extern OCTAVE_API uint16_t *
+octave_u16_conv_from_encoding_strict (const char *fromcode, const char *src,
+                                      size_t srclen, size_t *lengthp);
+
 extern OCTAVE_API char *
 octave_u16_conv_to_encoding (const char *tocode, const uint16_t *src,
                              size_t srclen, size_t *lengthp);