changeset 32049:1c99c8f020f7

gui: Show encodings available with iconv in file editor preferences. * liboctave/wrappers/iconv-wrappers.h, liboctave/wrappers/iconv-wrappers.c (octave_iconvlist_wrapper, octave_iconv_canonicalize_wrapper): Add wrappers for libiconv functions. * liboctave/util/oct-string.h, liboctave/util/oct-string.cc (octave::string::get_encoding_list): Add new function that returns an ordered list of canonicalized encoding names that are available from libiconv. Use list of encoding identifiers as fallback on platforms without the required funcions. * libgui/src/gui-settings.cc, libgui/src/gui-settings.h (octave::gui_settings::get_codecs): Remove function. (octave::gui_settings::combo_encoding): Show list of encoding names that are actually available to the interpreter instead of a list of encodings available in Qt. * libgui/src/qt-interpreter-events.cc (octave::qt_interpreter_events::gui_preference_adjust): Remove logic for mapping between Qt encoding names and iconv encoding names. * libgui/src/settings-dialog.cc: Remove unused header. * m4/acinclude.m4 (OCTAVE_CHECK_ICONVLIST, OCTAVE_CHECK_ICONV_CANONICALIZE): Add checks for functions from libiconv that are not available on all platforms. * configure.ac: Call new functions.
author Markus Mützel <markus.muetzel@gmx.de>
date Sat, 22 Apr 2023 19:01:35 +0200
parents 61db3c9377fb
children c00d842608f7
files configure.ac libgui/src/gui-settings.cc libgui/src/gui-settings.h libgui/src/qt-interpreter-events.cc libgui/src/settings-dialog.cc liboctave/util/oct-string.cc liboctave/util/oct-string.h liboctave/wrappers/iconv-wrappers.c liboctave/wrappers/iconv-wrappers.h m4/acinclude.m4
diffstat 10 files changed, 264 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/configure.ac	Fri Apr 21 14:49:23 2023 -0400
+++ b/configure.ac	Sat Apr 22 19:01:35 2023 +0200
@@ -1497,6 +1497,12 @@
 
 OCTAVE_ENABLE_READLINE
 
+### Check whether functions from libiconv are available.
+
+OCTAVE_CHECK_ICONVLIST
+
+OCTAVE_CHECK_ICONV_CANONICALIZE
+
 ### Check for ZLIB library.
 
 OCTAVE_CHECK_LIB(z, ZLIB,
--- a/libgui/src/gui-settings.cc	Fri Apr 21 14:49:23 2023 -0400
+++ b/libgui/src/gui-settings.cc	Sat Apr 22 19:01:35 2023 +0200
@@ -50,7 +50,6 @@
 #include <QShortcut>
 #include <QString>
 #include <QStringList>
-#include <QTextCodec>
 #include <QTranslator>
 
 #include "gui-preferences-cs.h"
@@ -61,6 +60,7 @@
 
 #include "localcharset-wrapper.h"
 #include "oct-env.h"
+#include "oct-string.h"
 
 #include "defaults.h"
 
@@ -636,35 +636,20 @@
   sys::env::putenv ("HTTPS_PROXY", proxy_url_str);
 }
 
-// get a list of all available encodings
-void gui_settings::get_codecs (QStringList *codecs)
-{
-  // get the codec name for each mib
-  QList<int> all_mibs = QTextCodec::availableMibs ();
-  for (auto mib : all_mibs)
-    {
-      QTextCodec *c = QTextCodec::codecForMib (mib);
-      codecs->append (c->name ().toUpper ());
-    }
-
-  // Append SYSTEM
-  codecs->append (QString ("SYSTEM (") +
-                  QString (octave_locale_charset_wrapper ()).toUpper () +
-                  QString (")"));
-
-  // Clean up and sort list of codecs
-  codecs->removeDuplicates ();
-  std::sort (codecs->begin (), codecs->end ());
-}
-
 // initialize a given combo box with available text encodings
 void gui_settings::combo_encoding (QComboBox *combo, const QString& current)
 {
-  QStringList all_codecs;
-  get_codecs (&all_codecs);
+  std::vector<std::string> encoding_list {string::get_encoding_list ()};
+
+  // prepend SYSTEM
+  std::string locale_charset {octave_locale_charset_wrapper ()};
+  std::transform (locale_charset.begin (), locale_charset.end (),
+                  locale_charset.begin (), ::toupper);
+  locale_charset = "SYSTEM (" + locale_charset + ")";
+  encoding_list.insert (encoding_list.begin (), locale_charset);
 
   // get the value from the settings file if no current encoding is given
-  QString enc = current;
+  QString enc {current};
 
   // Check for valid codec for the default.  If this fails, "SYSTEM" (i.e.
   // locale_charset) will be chosen.
@@ -674,12 +659,12 @@
   bool show_system = false;
   if (ed_default_enc.def ().toString ().startsWith ("SYSTEM"))
     show_system = true;
-  else if (QTextCodec::codecForName (ed_default_enc.def ().toString ().toLatin1 ()))
+  else if (std::find (encoding_list.begin (), encoding_list.end (),
+                      ed_default_enc.def ().toString ().toStdString ())
+           != encoding_list.end ())
     default_exists = true;
 
-  QString default_enc =
-    QString ("SYSTEM (") +
-    QString (octave_locale_charset_wrapper ()).toUpper () + QString (")");
+  QString default_enc = QString::fromStdString (locale_charset);
 
   if (enc.isEmpty ())
     {
@@ -695,8 +680,12 @@
     }
 
   // fill the combo box
-  for (const auto& c : all_codecs)
-    combo->addItem (c);
+  for (const auto& c : encoding_list)
+    combo->addItem (QString::fromStdString (c));
+
+  // prepend current encoding if not in list
+  if (combo->findText (enc, Qt::MatchExactly) < 0)
+    combo->insertItem (0, enc);
 
   // prepend the default item
   combo->insertSeparator (0);
@@ -705,12 +694,8 @@
   else
     combo->insertItem (0, ed_default_enc.def ().toString ());
 
-  // select the default or the current one
-  int idx = combo->findText (enc, Qt::MatchExactly);
-  if (idx >= 0)
-    combo->setCurrentIndex (idx);
-  else
-    combo->setCurrentIndex (0);
+  // select the current encoding
+  combo->setCurrentIndex (combo->findText (enc, Qt::MatchExactly));
 
   combo->setMaxVisibleItems (12);
 }
--- a/libgui/src/gui-settings.h	Fri Apr 21 14:49:23 2023 -0400
+++ b/libgui/src/gui-settings.h	Sat Apr 22 19:01:35 2023 +0200
@@ -203,8 +203,6 @@
 
   void update_network_settings ();
 
-  void get_codecs (QStringList *codecs);
-
   void combo_encoding (QComboBox *combo, const QString& current = QString ());
 
   void reload ();
--- a/libgui/src/qt-interpreter-events.cc	Fri Apr 21 14:49:23 2023 -0400
+++ b/libgui/src/qt-interpreter-events.cc	Sat Apr 22 19:01:35 2023 +0200
@@ -718,36 +718,15 @@
 
   QString adjusted_value = value;
 
-  // Not all encodings are available.  Encodings are uppercase and do
-  // not use CPxxx but IBMxxx or WINDOWS-xxx.
-
   if (key == ed_default_enc.settings_key ())
     {
       adjusted_value = adjusted_value.toUpper ();
 
-      gui_settings settings;
-      QStringList codecs;
-      settings.get_codecs (&codecs);
-
-      QRegularExpression re {"^CP(\\d+)$"};
-      QRegularExpressionMatch match = re.match (adjusted_value);
-
       if (adjusted_value == "SYSTEM")
         adjusted_value =
           QString ("SYSTEM (") +
           QString (octave_locale_charset_wrapper ()).toUpper () +
           QString (")");
-      else if (match.hasMatch ())
-        {
-          if (codecs.contains ("IBM" + match.captured (1)))
-            adjusted_value = "IBM" + match.captured (1);
-          else if (codecs.contains ("WINDOWS-" + match.captured (1)))
-            adjusted_value = "WINDOWS-" + match.captured (1);
-          else
-            adjusted_value.clear ();
-        }
-      else if (! codecs.contains (adjusted_value))
-        adjusted_value.clear ();
     }
 
   return adjusted_value;
--- a/libgui/src/settings-dialog.cc	Fri Apr 21 14:49:23 2023 -0400
+++ b/libgui/src/settings-dialog.cc	Sat Apr 22 19:01:35 2023 +0200
@@ -39,7 +39,6 @@
 #include <QMessageBox>
 #include <QScrollBar>
 #include <QStyleFactory>
-#include <QTextCodec>
 #include <QThread>
 #include <QVector>
 
--- a/liboctave/util/oct-string.cc	Fri Apr 21 14:49:23 2023 -0400
+++ b/liboctave/util/oct-string.cc	Sat Apr 22 19:01:35 2023 +0200
@@ -34,8 +34,10 @@
 #include <cstring>
 #include <iomanip>
 #include <string>
+#include <unordered_set>
 
 #include "Array.h"
+#include "iconv-wrappers.h"
 #include "lo-ieee.h"
 #include "lo-mappers.h"
 #include "uniconv-wrappers.h"
@@ -639,6 +641,142 @@
   return retval;
 }
 
+std::vector<std::string>
+octave::string::get_encoding_list ()
+{
+  static std::vector<std::string> encoding_list;
+
+  if (encoding_list.empty ())
+    {
+#if defined (HAVE_ICONVLIST)
+      // get number of supported encodings
+      std::size_t count = 0;
+      octave_iconvlist_wrapper (
+        [] (unsigned int num, const char * const *, void *data) -> int
+          {
+            std::size_t *count_ptr = static_cast<std::size_t *> (data);
+            *count_ptr = num;
+            return 0;
+          },
+        &count);
+
+      if (count == static_cast<size_t>(-1))
+        {
+          encoding_list.push_back ("UTF-8");
+          return encoding_list;
+        }
+
+#  if defined (HAVE_ICONV_CANONICALIZE)
+      // use unordered_set to skip canonicalized aliases
+      std::unordered_set<std::string> encoding_set;
+      encoding_set.reserve (count);
+
+      // populate vector with name of encodings
+      octave_iconvlist_wrapper (
+        [] (unsigned int num, const char * const *names, void *data) -> int
+          {
+            std::unordered_set<std::string> *encoding_set_ptr
+              = static_cast<std::unordered_set<std::string> *> (data);
+            for (std::size_t i = 0; i < num; i++)
+              {
+                const char *canonicalized_enc
+                  = octave_iconv_canonicalize_wrapper (names[i]);
+                encoding_set_ptr->insert (canonicalized_enc);
+              }
+            return 0;
+          },
+        &encoding_set);
+
+      encoding_list.assign (encoding_set.begin (), encoding_set.end ());
+#  endif
+
+#else
+      // Use hardcoded list of encodings as a fallback for platforms without
+      // iconvlist (or another way of programmatically querrying a list of
+      // supported encodings).
+      // This list is inspired by the encodings supported by Geany.
+      encoding_list
+        = {"ISO-8859-1",
+           "ISO-8859-2",
+           "ISO-8859-3",
+           "ISO-8859-4",
+           "ISO-8859-5",
+           "ISO-8859-6",
+           "ISO-8859-7",
+           "ISO-8859-8",
+           "ISO-8859-9",
+           "ISO-8859-10",
+           "ISO-8859-13",
+           "ISO-8859-14",
+           "ISO-8859-15",
+           "ISO-8859-16",
+
+           "UTF-7",
+           "UTF-8",
+           "UTF-16LE",
+           "UTF-16BE",
+           "UTF-32LE",
+           "UTF-32BE",
+           "UCS-2LE",
+           "UCS-2BE",
+
+           "ARMSCII-8",
+           "BIG5",
+           "BIG5-HKSCS",
+           "CP866",
+
+           "EUC-JP",
+           "EUC-KR",
+           "EUC-TW",
+
+           "GB18030",
+           "GB_2312-80",
+           "GBK",
+           "HZ",
+
+           "IBM850",
+           "IBM852",
+           "IBM855",
+           "IBM857",
+           "IBM862",
+           "IBM864",
+
+           "ISO-2022-JP",
+           "ISO-2022-KR",
+           "JOHAB",
+           "KOI8-R",
+           "KOI8-U",
+
+           "SHIFT_JIS",
+           "TCVN",
+           "TIS-620",
+           "UHC",
+           "VISCII",
+
+           "CP1250",
+           "CP1251",
+           "CP1252",
+           "CP1253",
+           "CP1254",
+           "CP1255",
+           "CP1256",
+           "CP1257",
+           "CP1258",
+
+           "CP932"
+           };
+
+      // FIXME: Should we check whether those are actually valid encoding
+      // identifiers?
+#endif
+
+      // sort list of encodings
+      std::sort (encoding_list.begin (), encoding_list.end ());
+    }
+
+  return encoding_list;
+}
+
 typedef octave::string::codecvt_u8::InternT InternT;
 typedef octave::string::codecvt_u8::ExternT ExternT;
 typedef octave::string::codecvt_u8::StateT StateT;
--- a/liboctave/util/oct-string.h	Fri Apr 21 14:49:23 2023 -0400
+++ b/liboctave/util/oct-string.h	Sat Apr 22 19:01:35 2023 +0200
@@ -29,6 +29,7 @@
 #include "octave-config.h"
 
 #include <locale>
+#include <vector>
 
 #include "oct-cmplx.h"
 
@@ -162,6 +163,9 @@
 u16_to_encoding (const std::string& who, const std::u16string& u16_string,
                  const std::string& encoding);
 
+extern OCTAVE_API std::vector<std::string>
+get_encoding_list ();
+
 template<class Facet>
 struct deletable_facet : Facet
 {
--- a/liboctave/wrappers/iconv-wrappers.c	Fri Apr 21 14:49:23 2023 -0400
+++ b/liboctave/wrappers/iconv-wrappers.c	Sat Apr 22 19:01:35 2023 +0200
@@ -42,3 +42,26 @@
 {
   return iconv_close ((iconv_t) cd);
 }
+
+void
+octave_iconvlist_wrapper (int (*do_one) (unsigned int namescount,
+                                         const char * const *names,
+                                         void *data),
+                          void *data)
+{
+#if defined (HAVE_ICONVLIST)
+  iconvlist (do_one, data);
+#endif
+
+  return;
+}
+
+const char *
+octave_iconv_canonicalize_wrapper (const char *name)
+{
+#if defined (HAVE_ICONV_CANONICALIZE)
+  return iconv_canonicalize (name);
+#else
+  return name;
+#endif
+}
--- a/liboctave/wrappers/iconv-wrappers.h	Fri Apr 21 14:49:23 2023 -0400
+++ b/liboctave/wrappers/iconv-wrappers.h	Sat Apr 22 19:01:35 2023 +0200
@@ -35,6 +35,15 @@
 
 extern OCTAVE_API int octave_iconv_close_wrapper (void *cd);
 
+extern OCTAVE_API void
+octave_iconvlist_wrapper (int (*do_one) (unsigned int namescount,
+                                         const char * const *names,
+                                         void *data),
+                          void *data);
+
+extern OCTAVE_API const char *
+octave_iconv_canonicalize_wrapper (const char *name);
+
 #if defined __cplusplus
 }
 #endif
--- a/m4/acinclude.m4	Fri Apr 21 14:49:23 2023 -0400
+++ b/m4/acinclude.m4	Sat Apr 22 19:01:35 2023 +0200
@@ -1256,6 +1256,68 @@
   fi
 ])
 dnl
+dnl Check whether iconv provides the function iconvlist.
+dnl
+AC_DEFUN([OCTAVE_CHECK_ICONVLIST], [
+  AC_CACHE_CHECK([whether the function iconvlist is available],
+    [octave_cv_iconvlist],
+    [ac_octave_save_LIBS="$LIBS"
+    LIBS="$LIBICONV $LIBS"
+    AC_LANG_PUSH(C++)
+    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+        #if HAVE_ICONV
+        extern "C"
+        {
+        #  include <iconv.h>
+        }
+        #endif
+        ]], [[
+        iconvlist (
+           [] (unsigned int, const char * const *, void *) -> int
+             {
+               return 0;
+             },
+           nullptr);
+        ]])],
+      octave_cv_iconvlist=yes,
+      octave_cv_iconvlist=no)
+    AC_LANG_POP(C++)
+    LIBS="$ac_octave_save_LIBS"
+  ])
+  if test $octave_cv_iconvlist = yes; then
+    AC_DEFINE(HAVE_ICONVLIST, 1, [Define to 1 if iconvlist is available.])
+  fi
+])
+dnl
+dnl Check whether iconv provides the function iconv_canonicalize.
+dnl
+AC_DEFUN([OCTAVE_CHECK_ICONV_CANONICALIZE], [
+  AC_CACHE_CHECK([whether the function iconv_canonicalize is available],
+    [octave_cv_iconv_canonicalize],
+    [ac_octave_save_LIBS="$LIBS"
+    LIBS="$LIBICONV $LIBS"
+    AC_LANG_PUSH(C++)
+    AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+        #if HAVE_ICONV
+        extern "C"
+        {
+        #  include <iconv.h>
+        }
+        #endif
+        ]], [[
+        iconv_canonicalize ("UTF-8");
+        ]])],
+      octave_cv_iconv_canonicalize=yes,
+      octave_cv_iconv_canonicalize=no)
+    AC_LANG_POP(C++)
+    LIBS="$ac_octave_save_LIBS"
+  ])
+  if test $octave_cv_iconv_canonicalize = yes; then
+    AC_DEFINE(HAVE_ICONV_CANONICALIZE, 1,
+      [Define to 1 if iconv_canonicalize is available.])
+  fi
+])
+dnl
 dnl Check whether using HDF5 DLL under Windows.  This is done by
 dnl testing for a data symbol in the HDF5 library, which would
 dnl require the definition of _HDF5USEDL_ under MSVC compiler.