# HG changeset patch # User Markus Mützel # Date 1682182895 -7200 # Node ID 1c99c8f020f788b23ee4c26dbc51a0985ca447da # Parent 61db3c9377fb4aae48cb641eccf63c949c1ec3f2 gui: Show encodings available with iconv in file editor preferences. * liboctave/wrappers/iconv-wrappers.h, liboctave/wrappers/iconv-wrappers.c (octave_iconvlist_wrapper, octave_iconv_canonicalize_wrapper): Add wrappers for libiconv functions. * liboctave/util/oct-string.h, liboctave/util/oct-string.cc (octave::string::get_encoding_list): Add new function that returns an ordered list of canonicalized encoding names that are available from libiconv. Use list of encoding identifiers as fallback on platforms without the required funcions. * libgui/src/gui-settings.cc, libgui/src/gui-settings.h (octave::gui_settings::get_codecs): Remove function. (octave::gui_settings::combo_encoding): Show list of encoding names that are actually available to the interpreter instead of a list of encodings available in Qt. * libgui/src/qt-interpreter-events.cc (octave::qt_interpreter_events::gui_preference_adjust): Remove logic for mapping between Qt encoding names and iconv encoding names. * libgui/src/settings-dialog.cc: Remove unused header. * m4/acinclude.m4 (OCTAVE_CHECK_ICONVLIST, OCTAVE_CHECK_ICONV_CANONICALIZE): Add checks for functions from libiconv that are not available on all platforms. * configure.ac: Call new functions. diff -r 61db3c9377fb -r 1c99c8f020f7 configure.ac --- a/configure.ac Fri Apr 21 14:49:23 2023 -0400 +++ b/configure.ac Sat Apr 22 19:01:35 2023 +0200 @@ -1497,6 +1497,12 @@ OCTAVE_ENABLE_READLINE +### Check whether functions from libiconv are available. + +OCTAVE_CHECK_ICONVLIST + +OCTAVE_CHECK_ICONV_CANONICALIZE + ### Check for ZLIB library. OCTAVE_CHECK_LIB(z, ZLIB, diff -r 61db3c9377fb -r 1c99c8f020f7 libgui/src/gui-settings.cc --- a/libgui/src/gui-settings.cc Fri Apr 21 14:49:23 2023 -0400 +++ b/libgui/src/gui-settings.cc Sat Apr 22 19:01:35 2023 +0200 @@ -50,7 +50,6 @@ #include #include #include -#include #include #include "gui-preferences-cs.h" @@ -61,6 +60,7 @@ #include "localcharset-wrapper.h" #include "oct-env.h" +#include "oct-string.h" #include "defaults.h" @@ -636,35 +636,20 @@ sys::env::putenv ("HTTPS_PROXY", proxy_url_str); } -// get a list of all available encodings -void gui_settings::get_codecs (QStringList *codecs) -{ - // get the codec name for each mib - QList all_mibs = QTextCodec::availableMibs (); - for (auto mib : all_mibs) - { - QTextCodec *c = QTextCodec::codecForMib (mib); - codecs->append (c->name ().toUpper ()); - } - - // Append SYSTEM - codecs->append (QString ("SYSTEM (") + - QString (octave_locale_charset_wrapper ()).toUpper () + - QString (")")); - - // Clean up and sort list of codecs - codecs->removeDuplicates (); - std::sort (codecs->begin (), codecs->end ()); -} - // initialize a given combo box with available text encodings void gui_settings::combo_encoding (QComboBox *combo, const QString& current) { - QStringList all_codecs; - get_codecs (&all_codecs); + std::vector encoding_list {string::get_encoding_list ()}; + + // prepend SYSTEM + std::string locale_charset {octave_locale_charset_wrapper ()}; + std::transform (locale_charset.begin (), locale_charset.end (), + locale_charset.begin (), ::toupper); + locale_charset = "SYSTEM (" + locale_charset + ")"; + encoding_list.insert (encoding_list.begin (), locale_charset); // get the value from the settings file if no current encoding is given - QString enc = current; + QString enc {current}; // Check for valid codec for the default. If this fails, "SYSTEM" (i.e. // locale_charset) will be chosen. @@ -674,12 +659,12 @@ bool show_system = false; if (ed_default_enc.def ().toString ().startsWith ("SYSTEM")) show_system = true; - else if (QTextCodec::codecForName (ed_default_enc.def ().toString ().toLatin1 ())) + else if (std::find (encoding_list.begin (), encoding_list.end (), + ed_default_enc.def ().toString ().toStdString ()) + != encoding_list.end ()) default_exists = true; - QString default_enc = - QString ("SYSTEM (") + - QString (octave_locale_charset_wrapper ()).toUpper () + QString (")"); + QString default_enc = QString::fromStdString (locale_charset); if (enc.isEmpty ()) { @@ -695,8 +680,12 @@ } // fill the combo box - for (const auto& c : all_codecs) - combo->addItem (c); + for (const auto& c : encoding_list) + combo->addItem (QString::fromStdString (c)); + + // prepend current encoding if not in list + if (combo->findText (enc, Qt::MatchExactly) < 0) + combo->insertItem (0, enc); // prepend the default item combo->insertSeparator (0); @@ -705,12 +694,8 @@ else combo->insertItem (0, ed_default_enc.def ().toString ()); - // select the default or the current one - int idx = combo->findText (enc, Qt::MatchExactly); - if (idx >= 0) - combo->setCurrentIndex (idx); - else - combo->setCurrentIndex (0); + // select the current encoding + combo->setCurrentIndex (combo->findText (enc, Qt::MatchExactly)); combo->setMaxVisibleItems (12); } diff -r 61db3c9377fb -r 1c99c8f020f7 libgui/src/gui-settings.h --- a/libgui/src/gui-settings.h Fri Apr 21 14:49:23 2023 -0400 +++ b/libgui/src/gui-settings.h Sat Apr 22 19:01:35 2023 +0200 @@ -203,8 +203,6 @@ void update_network_settings (); - void get_codecs (QStringList *codecs); - void combo_encoding (QComboBox *combo, const QString& current = QString ()); void reload (); diff -r 61db3c9377fb -r 1c99c8f020f7 libgui/src/qt-interpreter-events.cc --- a/libgui/src/qt-interpreter-events.cc Fri Apr 21 14:49:23 2023 -0400 +++ b/libgui/src/qt-interpreter-events.cc Sat Apr 22 19:01:35 2023 +0200 @@ -718,36 +718,15 @@ QString adjusted_value = value; - // Not all encodings are available. Encodings are uppercase and do - // not use CPxxx but IBMxxx or WINDOWS-xxx. - if (key == ed_default_enc.settings_key ()) { adjusted_value = adjusted_value.toUpper (); - gui_settings settings; - QStringList codecs; - settings.get_codecs (&codecs); - - QRegularExpression re {"^CP(\\d+)$"}; - QRegularExpressionMatch match = re.match (adjusted_value); - if (adjusted_value == "SYSTEM") adjusted_value = QString ("SYSTEM (") + QString (octave_locale_charset_wrapper ()).toUpper () + QString (")"); - else if (match.hasMatch ()) - { - if (codecs.contains ("IBM" + match.captured (1))) - adjusted_value = "IBM" + match.captured (1); - else if (codecs.contains ("WINDOWS-" + match.captured (1))) - adjusted_value = "WINDOWS-" + match.captured (1); - else - adjusted_value.clear (); - } - else if (! codecs.contains (adjusted_value)) - adjusted_value.clear (); } return adjusted_value; diff -r 61db3c9377fb -r 1c99c8f020f7 libgui/src/settings-dialog.cc --- a/libgui/src/settings-dialog.cc Fri Apr 21 14:49:23 2023 -0400 +++ b/libgui/src/settings-dialog.cc Sat Apr 22 19:01:35 2023 +0200 @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff -r 61db3c9377fb -r 1c99c8f020f7 liboctave/util/oct-string.cc --- a/liboctave/util/oct-string.cc Fri Apr 21 14:49:23 2023 -0400 +++ b/liboctave/util/oct-string.cc Sat Apr 22 19:01:35 2023 +0200 @@ -34,8 +34,10 @@ #include #include #include +#include #include "Array.h" +#include "iconv-wrappers.h" #include "lo-ieee.h" #include "lo-mappers.h" #include "uniconv-wrappers.h" @@ -639,6 +641,142 @@ return retval; } +std::vector +octave::string::get_encoding_list () +{ + static std::vector encoding_list; + + if (encoding_list.empty ()) + { +#if defined (HAVE_ICONVLIST) + // get number of supported encodings + std::size_t count = 0; + octave_iconvlist_wrapper ( + [] (unsigned int num, const char * const *, void *data) -> int + { + std::size_t *count_ptr = static_cast (data); + *count_ptr = num; + return 0; + }, + &count); + + if (count == static_cast(-1)) + { + encoding_list.push_back ("UTF-8"); + return encoding_list; + } + +# if defined (HAVE_ICONV_CANONICALIZE) + // use unordered_set to skip canonicalized aliases + std::unordered_set encoding_set; + encoding_set.reserve (count); + + // populate vector with name of encodings + octave_iconvlist_wrapper ( + [] (unsigned int num, const char * const *names, void *data) -> int + { + std::unordered_set *encoding_set_ptr + = static_cast *> (data); + for (std::size_t i = 0; i < num; i++) + { + const char *canonicalized_enc + = octave_iconv_canonicalize_wrapper (names[i]); + encoding_set_ptr->insert (canonicalized_enc); + } + return 0; + }, + &encoding_set); + + encoding_list.assign (encoding_set.begin (), encoding_set.end ()); +# endif + +#else + // Use hardcoded list of encodings as a fallback for platforms without + // iconvlist (or another way of programmatically querrying a list of + // supported encodings). + // This list is inspired by the encodings supported by Geany. + encoding_list + = {"ISO-8859-1", + "ISO-8859-2", + "ISO-8859-3", + "ISO-8859-4", + "ISO-8859-5", + "ISO-8859-6", + "ISO-8859-7", + "ISO-8859-8", + "ISO-8859-9", + "ISO-8859-10", + "ISO-8859-13", + "ISO-8859-14", + "ISO-8859-15", + "ISO-8859-16", + + "UTF-7", + "UTF-8", + "UTF-16LE", + "UTF-16BE", + "UTF-32LE", + "UTF-32BE", + "UCS-2LE", + "UCS-2BE", + + "ARMSCII-8", + "BIG5", + "BIG5-HKSCS", + "CP866", + + "EUC-JP", + "EUC-KR", + "EUC-TW", + + "GB18030", + "GB_2312-80", + "GBK", + "HZ", + + "IBM850", + "IBM852", + "IBM855", + "IBM857", + "IBM862", + "IBM864", + + "ISO-2022-JP", + "ISO-2022-KR", + "JOHAB", + "KOI8-R", + "KOI8-U", + + "SHIFT_JIS", + "TCVN", + "TIS-620", + "UHC", + "VISCII", + + "CP1250", + "CP1251", + "CP1252", + "CP1253", + "CP1254", + "CP1255", + "CP1256", + "CP1257", + "CP1258", + + "CP932" + }; + + // FIXME: Should we check whether those are actually valid encoding + // identifiers? +#endif + + // sort list of encodings + std::sort (encoding_list.begin (), encoding_list.end ()); + } + + return encoding_list; +} + typedef octave::string::codecvt_u8::InternT InternT; typedef octave::string::codecvt_u8::ExternT ExternT; typedef octave::string::codecvt_u8::StateT StateT; diff -r 61db3c9377fb -r 1c99c8f020f7 liboctave/util/oct-string.h --- a/liboctave/util/oct-string.h Fri Apr 21 14:49:23 2023 -0400 +++ b/liboctave/util/oct-string.h Sat Apr 22 19:01:35 2023 +0200 @@ -29,6 +29,7 @@ #include "octave-config.h" #include +#include #include "oct-cmplx.h" @@ -162,6 +163,9 @@ u16_to_encoding (const std::string& who, const std::u16string& u16_string, const std::string& encoding); +extern OCTAVE_API std::vector +get_encoding_list (); + template struct deletable_facet : Facet { diff -r 61db3c9377fb -r 1c99c8f020f7 liboctave/wrappers/iconv-wrappers.c --- a/liboctave/wrappers/iconv-wrappers.c Fri Apr 21 14:49:23 2023 -0400 +++ b/liboctave/wrappers/iconv-wrappers.c Sat Apr 22 19:01:35 2023 +0200 @@ -42,3 +42,26 @@ { return iconv_close ((iconv_t) cd); } + +void +octave_iconvlist_wrapper (int (*do_one) (unsigned int namescount, + const char * const *names, + void *data), + void *data) +{ +#if defined (HAVE_ICONVLIST) + iconvlist (do_one, data); +#endif + + return; +} + +const char * +octave_iconv_canonicalize_wrapper (const char *name) +{ +#if defined (HAVE_ICONV_CANONICALIZE) + return iconv_canonicalize (name); +#else + return name; +#endif +} diff -r 61db3c9377fb -r 1c99c8f020f7 liboctave/wrappers/iconv-wrappers.h --- a/liboctave/wrappers/iconv-wrappers.h Fri Apr 21 14:49:23 2023 -0400 +++ b/liboctave/wrappers/iconv-wrappers.h Sat Apr 22 19:01:35 2023 +0200 @@ -35,6 +35,15 @@ extern OCTAVE_API int octave_iconv_close_wrapper (void *cd); +extern OCTAVE_API void +octave_iconvlist_wrapper (int (*do_one) (unsigned int namescount, + const char * const *names, + void *data), + void *data); + +extern OCTAVE_API const char * +octave_iconv_canonicalize_wrapper (const char *name); + #if defined __cplusplus } #endif diff -r 61db3c9377fb -r 1c99c8f020f7 m4/acinclude.m4 --- a/m4/acinclude.m4 Fri Apr 21 14:49:23 2023 -0400 +++ b/m4/acinclude.m4 Sat Apr 22 19:01:35 2023 +0200 @@ -1256,6 +1256,68 @@ fi ]) dnl +dnl Check whether iconv provides the function iconvlist. +dnl +AC_DEFUN([OCTAVE_CHECK_ICONVLIST], [ + AC_CACHE_CHECK([whether the function iconvlist is available], + [octave_cv_iconvlist], + [ac_octave_save_LIBS="$LIBS" + LIBS="$LIBICONV $LIBS" + AC_LANG_PUSH(C++) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ + #if HAVE_ICONV + extern "C" + { + # include + } + #endif + ]], [[ + iconvlist ( + [] (unsigned int, const char * const *, void *) -> int + { + return 0; + }, + nullptr); + ]])], + octave_cv_iconvlist=yes, + octave_cv_iconvlist=no) + AC_LANG_POP(C++) + LIBS="$ac_octave_save_LIBS" + ]) + if test $octave_cv_iconvlist = yes; then + AC_DEFINE(HAVE_ICONVLIST, 1, [Define to 1 if iconvlist is available.]) + fi +]) +dnl +dnl Check whether iconv provides the function iconv_canonicalize. +dnl +AC_DEFUN([OCTAVE_CHECK_ICONV_CANONICALIZE], [ + AC_CACHE_CHECK([whether the function iconv_canonicalize is available], + [octave_cv_iconv_canonicalize], + [ac_octave_save_LIBS="$LIBS" + LIBS="$LIBICONV $LIBS" + AC_LANG_PUSH(C++) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ + #if HAVE_ICONV + extern "C" + { + # include + } + #endif + ]], [[ + iconv_canonicalize ("UTF-8"); + ]])], + octave_cv_iconv_canonicalize=yes, + octave_cv_iconv_canonicalize=no) + AC_LANG_POP(C++) + LIBS="$ac_octave_save_LIBS" + ]) + if test $octave_cv_iconv_canonicalize = yes; then + AC_DEFINE(HAVE_ICONV_CANONICALIZE, 1, + [Define to 1 if iconv_canonicalize is available.]) + fi +]) +dnl dnl Check whether using HDF5 DLL under Windows. This is done by dnl testing for a data symbol in the HDF5 library, which would dnl require the definition of _HDF5USEDL_ under MSVC compiler.