changeset 32026:36b6c5a77727

gui: Show encodings available with iconv in file editor preferences. * liboctave/wrappers/iconv-wrappers.h, liboctave/wrappers/iconv-wrappers.c (octave_iconvlist_wrapper, octave_iconv_canonicalize_wrapper): Add wrappers for iconv functions. * liboctave/util/oct-string.h, liboctave/util/oct-string.cc (octave::string::get_encoding_list): Add new function that returns an ordered list of canonicalized encoding names that are available from libiconv. * libgui/src/gui-settings.cc, libgui/src/gui-settings.h (octave::gui_settings::get_codecs): Remove function. (octave::gui_settings::combo_encoding): Show list of encoding names that are actually available to the interpreter instead of a list of encodings available in Qt. * libgui/src/qt-interpreter-events.cc (octave::qt_interpreter_events::gui_preference_adjust): Remove logic for mapping between Qt encoding names and iconv encoding names. * libgui/src/settings-dialog.cc: Remove unused header.
author Markus Mützel <markus.muetzel@gmx.de>
date Sun, 16 Apr 2023 15:16:33 +0200
parents 6db223403fdb
children 9918d52ee76a
files libgui/src/gui-settings.cc libgui/src/gui-settings.h libgui/src/qt-interpreter-events.cc libgui/src/settings-dialog.cc liboctave/util/oct-string.cc liboctave/util/oct-string.h liboctave/wrappers/iconv-wrappers.c liboctave/wrappers/iconv-wrappers.h
diffstat 8 files changed, 101 insertions(+), 55 deletions(-) [+]
line wrap: on
line diff
--- a/libgui/src/gui-settings.cc	Sun Apr 16 14:28:17 2023 +0200
+++ b/libgui/src/gui-settings.cc	Sun Apr 16 15:16:33 2023 +0200
@@ -50,7 +50,6 @@
 #include <QShortcut>
 #include <QString>
 #include <QStringList>
-#include <QTextCodec>
 #include <QTranslator>
 
 #include "gui-preferences-cs.h"
@@ -61,6 +60,7 @@
 
 #include "localcharset-wrapper.h"
 #include "oct-env.h"
+#include "oct-string.h"
 
 #include "defaults.h"
 
@@ -636,35 +636,20 @@
   sys::env::putenv ("HTTPS_PROXY", proxy_url_str);
 }
 
-// get a list of all available encodings
-void gui_settings::get_codecs (QStringList *codecs)
-{
-  // get the codec name for each mib
-  QList<int> all_mibs = QTextCodec::availableMibs ();
-  for (auto mib : all_mibs)
-    {
-      QTextCodec *c = QTextCodec::codecForMib (mib);
-      codecs->append (c->name ().toUpper ());
-    }
-
-  // Append SYSTEM
-  codecs->append (QString ("SYSTEM (") +
-                  QString (octave_locale_charset_wrapper ()).toUpper () +
-                  QString (")"));
-
-  // Clean up and sort list of codecs
-  codecs->removeDuplicates ();
-  std::sort (codecs->begin (), codecs->end ());
-}
-
 // initialize a given combo box with available text encodings
 void gui_settings::combo_encoding (QComboBox *combo, const QString& current)
 {
-  QStringList all_codecs;
-  get_codecs (&all_codecs);
+  std::vector<std::string> encoding_list {string::get_encoding_list ()};
+
+  // prepend SYSTEM
+  std::string locale_charset {octave_locale_charset_wrapper ()};
+  std::transform (locale_charset.begin (), locale_charset.end (),
+                  locale_charset.begin (), ::toupper);
+  locale_charset = "SYSTEM (" + locale_charset + ")";
+  encoding_list.insert (encoding_list.begin (), locale_charset);
 
   // get the value from the settings file if no current encoding is given
-  QString enc = current;
+  QString enc {current};
 
   // Check for valid codec for the default.  If this fails, "SYSTEM" (i.e.
   // locale_charset) will be chosen.
@@ -674,12 +659,12 @@
   bool show_system = false;
   if (ed_default_enc.def ().toString ().startsWith ("SYSTEM"))
     show_system = true;
-  else if (QTextCodec::codecForName (ed_default_enc.def ().toString ().toLatin1 ()))
+  else if (std::find (encoding_list.begin (), encoding_list.end (),
+                      ed_default_enc.def ().toString ().toStdString ())
+           != encoding_list.end ())
     default_exists = true;
 
-  QString default_enc =
-    QString ("SYSTEM (") +
-    QString (octave_locale_charset_wrapper ()).toUpper () + QString (")");
+  QString default_enc = QString::fromStdString (locale_charset);
 
   if (enc.isEmpty ())
     {
@@ -695,8 +680,8 @@
     }
 
   // fill the combo box
-  for (const auto& c : all_codecs)
-    combo->addItem (c);
+  for (const auto& c : encoding_list)
+    combo->addItem (QString::fromStdString (c));
 
   // prepend the default item
   combo->insertSeparator (0);
--- a/libgui/src/gui-settings.h	Sun Apr 16 14:28:17 2023 +0200
+++ b/libgui/src/gui-settings.h	Sun Apr 16 15:16:33 2023 +0200
@@ -203,8 +203,6 @@
 
   void update_network_settings ();
 
-  void get_codecs (QStringList *codecs);
-
   void combo_encoding (QComboBox *combo, const QString& current = QString ());
 
   void reload ();
--- a/libgui/src/qt-interpreter-events.cc	Sun Apr 16 14:28:17 2023 +0200
+++ b/libgui/src/qt-interpreter-events.cc	Sun Apr 16 15:16:33 2023 +0200
@@ -718,36 +718,15 @@
 
   QString adjusted_value = value;
 
-  // Not all encodings are available.  Encodings are uppercase and do
-  // not use CPxxx but IBMxxx or WINDOWS-xxx.
-
   if (key == ed_default_enc.settings_key ())
     {
       adjusted_value = adjusted_value.toUpper ();
 
-      gui_settings settings;
-      QStringList codecs;
-      settings.get_codecs (&codecs);
-
-      QRegularExpression re {"^CP(\\d+)$"};
-      QRegularExpressionMatch match = re.match (adjusted_value);
-
       if (adjusted_value == "SYSTEM")
         adjusted_value =
           QString ("SYSTEM (") +
           QString (octave_locale_charset_wrapper ()).toUpper () +
           QString (")");
-      else if (match.hasMatch ())
-        {
-          if (codecs.contains ("IBM" + match.captured (1)))
-            adjusted_value = "IBM" + match.captured (1);
-          else if (codecs.contains ("WINDOWS-" + match.captured (1)))
-            adjusted_value = "WINDOWS-" + match.captured (1);
-          else
-            adjusted_value.clear ();
-        }
-      else if (! codecs.contains (adjusted_value))
-        adjusted_value.clear ();
     }
 
   return adjusted_value;
--- a/libgui/src/settings-dialog.cc	Sun Apr 16 14:28:17 2023 +0200
+++ b/libgui/src/settings-dialog.cc	Sun Apr 16 15:16:33 2023 +0200
@@ -39,7 +39,6 @@
 #include <QMessageBox>
 #include <QScrollBar>
 #include <QStyleFactory>
-#include <QTextCodec>
 #include <QThread>
 #include <QVector>
 
--- a/liboctave/util/oct-string.cc	Sun Apr 16 14:28:17 2023 +0200
+++ b/liboctave/util/oct-string.cc	Sun Apr 16 15:16:33 2023 +0200
@@ -34,8 +34,10 @@
 #include <cstring>
 #include <iomanip>
 #include <string>
+#include <unordered_set>
 
 #include "Array.h"
+#include "iconv-wrappers.h"
 #include "lo-ieee.h"
 #include "lo-mappers.h"
 #include "uniconv-wrappers.h"
@@ -639,6 +641,60 @@
   return retval;
 }
 
+std::vector<std::string>
+octave::string::get_encoding_list ()
+{
+  static std::vector<std::string> encoding_list;
+
+  if (encoding_list.empty ())
+    {
+#if defined (HAVE_ICONV)
+      // get number of supported encodings
+      std::size_t count = 0;
+      octave_iconvlist_wrapper (
+        [] (unsigned int nm_count, const char * const *, void *data) -> int
+          {
+            std::size_t *count_ptr = static_cast<std::size_t *> (data);
+            *count_ptr = nm_count;
+            return 0;
+          },
+        &count);
+
+      if (count == static_cast<size_t>(-1))
+        {
+          encoding_list.push_back ("UTF-8");
+          return encoding_list;
+        }
+
+      // use unordered_set to skip canonicalized aliases
+      std::unordered_set<std::string> encoding_set;
+      encoding_set.reserve (count);
+
+      // populate vector with name of encodings
+      octave_iconvlist_wrapper (
+        [] (unsigned int nm_count, const char * const *names, void *data) -> int
+          {
+            std::unordered_set<std::string> *encoding_set_ptr
+              = static_cast<std::unordered_set<std::string> *> (data);
+            for (std::size_t i = 0; i < nm_count; i++)
+              encoding_set_ptr
+                ->insert (octave_iconv_canonicalize_wrapper (names[i]));
+            return 0;
+          },
+        &encoding_set);
+
+      // sort list of encodings
+      encoding_list.assign (encoding_set.begin (), encoding_set.end ());
+      std::sort (encoding_list.begin (), encoding_list.end ());
+
+#else
+      encoding_list.push_back ("UTF-8");
+#endif
+    }
+
+  return encoding_list;
+}
+
 typedef octave::string::codecvt_u8::InternT InternT;
 typedef octave::string::codecvt_u8::ExternT ExternT;
 typedef octave::string::codecvt_u8::StateT StateT;
--- a/liboctave/util/oct-string.h	Sun Apr 16 14:28:17 2023 +0200
+++ b/liboctave/util/oct-string.h	Sun Apr 16 15:16:33 2023 +0200
@@ -29,6 +29,7 @@
 #include "octave-config.h"
 
 #include <locale>
+#include <vector>
 
 #include "oct-cmplx.h"
 
@@ -162,6 +163,9 @@
 u16_to_encoding (const std::string& who, const std::u16string& u16_string,
                  const std::string& encoding);
 
+extern OCTAVE_API std::vector<std::string>
+get_encoding_list ();
+
 template<class Facet>
 struct deletable_facet : Facet
 {
--- a/liboctave/wrappers/iconv-wrappers.c	Sun Apr 16 14:28:17 2023 +0200
+++ b/liboctave/wrappers/iconv-wrappers.c	Sun Apr 16 15:16:33 2023 +0200
@@ -42,3 +42,19 @@
 {
   return iconv_close ((iconv_t) cd);
 }
+
+void
+octave_iconvlist_wrapper (int (*do_one) (unsigned int namescount,
+                                         const char * const *names,
+                                         void *data),
+                          void *data)
+{
+  iconvlist (do_one, data);
+  return;
+}
+
+const char *
+octave_iconv_canonicalize_wrapper (const char *name)
+{
+  return iconv_canonicalize (name);
+}
--- a/liboctave/wrappers/iconv-wrappers.h	Sun Apr 16 14:28:17 2023 +0200
+++ b/liboctave/wrappers/iconv-wrappers.h	Sun Apr 16 15:16:33 2023 +0200
@@ -35,6 +35,15 @@
 
 extern OCTAVE_API int octave_iconv_close_wrapper (void *cd);
 
+extern OCTAVE_API void
+octave_iconvlist_wrapper (int (*do_one) (unsigned int namescount,
+                                         const char * const *names,
+                                         void *data),
+                          void *data);
+
+extern OCTAVE_API const char *
+octave_iconv_canonicalize_wrapper (const char *name);
+
 #if defined __cplusplus
 }
 #endif