Mercurial > octave
changeset 32021:da2954782945
gui: Use iconv/gnulib to save editor content in non-UTF-8.
* bootstrap.conf: Add module "uniconv/u16-conv-to-enc".
* liboctave/wrappers/uniconv-wrappers.c, liboctave/wrappers/uniconv-wrappers.h
(octave_u16_conv_to_encoding, octave_u16_conv_to_encoding_strict): Add wrappers
for gnulib function.
* liboctave/util/oct-string.cc, liboctave/util/oct-string.h
(octave::string::u16_to_encoding): Add new function.
* libgui/src/m-editor/file-editor-tab.h
(octave::file_editor_tab::check_valid_codec): Change return type of function.
* libgui/src/m-editor/file-editor-tab.cc
(octave::file_editor_tab::do_save_file): Use iconv/gnulib functions to save
file editor content in non-UTF-8 encoding. The interpreter needs to be able to
read these files using the iconv/gnulib functions anyway. And Qt6 can only
convert to UTF-8, UTF-16, UTF-32 or the system locale encoding.
(octave::file_editor_tab::check_valid_codec): Use iconv/gnulib facilities to
check if selected encoding is valid for file editor content.
(octave::file_editor_tab::save_file): Adjust for changed return type of
check_valid_codec.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Sat, 15 Apr 2023 11:29:15 +0200 |
parents | a5413268eb47 |
children | 5945e1bd73ea |
files | bootstrap.conf libgui/src/m-editor/file-editor-tab.cc libgui/src/m-editor/file-editor-tab.h liboctave/util/oct-string.cc liboctave/util/oct-string.h liboctave/wrappers/uniconv-wrappers.c liboctave/wrappers/uniconv-wrappers.h |
diffstat | 7 files changed, 162 insertions(+), 68 deletions(-) [+] |
line wrap: on
line diff
--- a/bootstrap.conf Fri Apr 14 21:24:06 2023 -0400 +++ b/bootstrap.conf Sat Apr 15 11:29:15 2023 +0200 @@ -105,6 +105,7 @@ uname unicase/u8-tolower unicase/u8-toupper + uniconv/u16-conv-to-enc uniconv/u32-conv-to-enc uniconv/u8-conv-from-enc uniconv/u8-conv-to-enc
--- a/libgui/src/m-editor/file-editor-tab.cc Fri Apr 14 21:24:06 2023 -0400 +++ b/libgui/src/m-editor/file-editor-tab.cc Sat Apr 15 11:29:15 2023 +0200 @@ -80,6 +80,7 @@ #include "cmd-edit.h" #include "file-ops.h" +#include "iconv-wrappers.h" #include "localcharset-wrapper.h" #include "uniconv-wrappers.h" @@ -2182,9 +2183,8 @@ m_encoding = m_new_encoding; // consider a possible new encoding - // set the desired codec (if suitable for contents) - QTextCodec *codec = check_valid_codec (); - if (! codec) + // check if the selected encoding is suitable for the content + if (! check_valid_codec ()) return; // No valid codec // Get a list of breakpoint line numbers, before exiting debug mode @@ -2346,31 +2346,72 @@ return; } - // save the contents into the file - - // write the file - QTextStream out (&file); + // target encoding + std::string encoding = m_encoding.toStdString (); + if (encoding.compare (0, 6, "SYSTEM") == 0) + encoding = octave_locale_charset_wrapper (); + + // check if selected encoding is suitable for contents + if (! check_valid_codec ()) + { + // begin watching file again if it was being watched previously + if (trackedFiles.contains (file_to_save)) + m_file_system_watcher.addPath (file_to_save); + + return; // no valid codec + } + + // save contents of editor in file + + QApplication::setOverrideCursor (Qt::WaitCursor); + + if (encoding == "utf-8" || encoding == "UTF-8") + { + // use Qt encoding conversion for UTF-8 + QTextStream out (&file); #if HAVE_QTEXTSTREAM_SETENCODING - // FIXME: Check and set encoding! + out.setEncoding (QStringConverter::Utf8); #else - // set the desired codec (if suitable for contents) - QTextCodec *codec = check_valid_codec (); - if (! codec) - return; // No valid codec - - // Save the file - out.setCodec (codec); + out.setCodec ("UTF-8"); #endif - QApplication::setOverrideCursor (Qt::WaitCursor); - - out << m_edit_area->text (); - if (settings.bool_value (ed_force_newline) - && m_edit_area->text ().length ()) - out << m_edit_area->eol_string (); // Add newline if desired - - out.flush (); + out << m_edit_area->text (); + + // add newline if desired + if (settings.bool_value (ed_force_newline) + && m_edit_area->text ().length ()) + out << m_edit_area->eol_string (); + + out.flush (); + } + else + { + // use iconv/gnulib for all other output encodings + QDataStream out (&file); + + // get natively UTF-16 encoded content of the QString as STL type + std::u16string u16_string = m_edit_area->text ().toStdU16String (); + + // convert to output encoding + std::string native_string + = string::u16_to_encoding ("file editor", u16_string, encoding); + + // save file + out.writeRawData (native_string.c_str (), native_string.size ()); + + // add newline if desired + if (settings.bool_value (ed_force_newline) + && m_edit_area->text ().length ()) + { + std::u16string u16_newline + = m_edit_area->eol_string ().toStdU16String (); + std::string newline + = string::u16_to_encoding ("file editor", u16_newline, encoding); + out.writeRawData (newline.c_str (), newline.size ()); + } + } + QApplication::restoreOverrideCursor (); // Finish writing by committing the changes to disk, @@ -2550,50 +2591,45 @@ return false; } -QTextCodec* file_editor_tab::check_valid_codec () +bool file_editor_tab::check_valid_codec () { - QTextCodec *codec = QTextCodec::codecForName (m_encoding.toLatin1 ()); - - // "SYSTEM" is used as alias for the locale encoding. - if ((! codec) && m_encoding.startsWith("SYSTEM")) - codec = QTextCodec::codecForLocale (); - - if (! codec) - { - QMessageBox::critical (nullptr, - tr ("Octave Editor"), - tr ("The current encoding %1\n" - "can not be applied.\n\n" - "Please select another one!").arg (m_encoding)); - - return nullptr; - } - QString editor_text = m_edit_area->text (); - bool can_encode = codec->canEncode (editor_text); - - // We cannot rely on QTextCodec::canEncode because it uses the - // ConverterState of convertFromUnicode which isn't updated by some - // implementations. - if (can_encode) + + // target encoding + std::string encoding = m_encoding.toStdString (); + if (encoding.compare (0, 6, "SYSTEM") == 0) + encoding = octave_locale_charset_wrapper (); + + if (encoding == "UTF-8" || encoding == "utf-8") + return true; + + // check if encoding is valid + void *codec = octave_iconv_open_wrapper (encoding.c_str (), "utf-8"); + if (codec == reinterpret_cast<void *> (-1)) { - QVector<uint> u32_str = editor_text.toUcs4 (); - const uint32_t *src = reinterpret_cast<const uint32_t *> - (u32_str.data ()); - - std::size_t length; - const std::string encoding = m_encoding.toStdString (); - char *res_str = - octave_u32_conv_to_encoding_strict (encoding.c_str (), src, - u32_str.size (), &length); - if (! res_str) - { - if (errno == EILSEQ) - can_encode = false; - } - else - ::free (static_cast<void *> (res_str)); + if (errno == EINVAL) + return false; } + else + octave_iconv_close_wrapper (codec); + + // check if all characters in the editor can be encoded in the target encoding + bool can_encode = true; + std::u16string u16_str = editor_text.toStdU16String (); + const uint16_t *src = reinterpret_cast<const uint16_t *> + (u16_str.c_str ()); + + std::size_t length; + char *res_str = + octave_u16_conv_to_encoding_strict (encoding.c_str (), src, + u16_str.size (), &length); + if (! res_str) + { + if (errno == EILSEQ) + can_encode = false; + } + else + ::free (static_cast<void *> (res_str)); if (! can_encode) { @@ -2608,12 +2644,10 @@ QMessageBox::Cancel); if (pressed_button == QMessageBox::Ignore) - return codec; - else - return nullptr; + can_encode = true; } - return codec; + return can_encode; } void file_editor_tab::handle_save_file_as_answer (const QString& save_file_name)
--- a/libgui/src/m-editor/file-editor-tab.h Fri Apr 14 21:24:06 2023 -0400 +++ b/libgui/src/m-editor/file-editor-tab.h Sat Apr 15 11:29:15 2023 +0200 @@ -266,7 +266,7 @@ bool restore_breakpoints = true); void save_file_as (bool remove_on_success = false); bool check_valid_identifier (QString file_name); - QTextCodec * check_valid_codec (); + bool check_valid_codec (); bool unchanged_or_saved ();
--- a/liboctave/util/oct-string.cc Fri Apr 14 21:24:06 2023 -0400 +++ b/liboctave/util/oct-string.cc Sat Apr 15 11:29:15 2023 +0200 @@ -607,6 +607,38 @@ return num_replacements; } +std::string +octave::string::u16_to_encoding (const std::string& who, + const std::u16string& u16_string, + const std::string& encoding) +{ + const uint16_t *src = reinterpret_cast<const uint16_t *> + (u16_string.c_str ()); + std::size_t srclen = u16_string.length (); + + std::size_t length; + char *native_str = octave_u16_conv_to_encoding (encoding.c_str (), src, + srclen, &length); + + if (! native_str) + { + if (errno == ENOSYS) + (*current_liboctave_error_handler) + ("%s: iconv() is not supported. Installing GNU libiconv and then " + "re-compiling Octave could fix this.", who.c_str ()); + else + (*current_liboctave_error_handler) + ("%s: converting from UTF-16 to codepage '%s' failed: %s", + who.c_str (), encoding.c_str (), std::strerror (errno)); + } + + octave::unwind_action free_native_str ([=] () { ::free (native_str); }); + + std::string retval = std::string (native_str, length); + + return retval; +} + typedef octave::string::codecvt_u8::InternT InternT; typedef octave::string::codecvt_u8::ExternT ExternT; typedef octave::string::codecvt_u8::StateT StateT;
--- a/liboctave/util/oct-string.h Fri Apr 14 21:24:06 2023 -0400 +++ b/liboctave/util/oct-string.h Sat Apr 15 11:29:15 2023 +0200 @@ -158,6 +158,9 @@ u8_validate (const std::string& who, std::string& in_string, const u8_fallback_type type = U8_REPLACEMENT_CHAR); +extern OCTAVE_API std::string +u16_to_encoding (const std::string& who, const std::u16string& u16_string, + const std::string& encoding); template<class Facet> struct deletable_facet : Facet
--- a/liboctave/wrappers/uniconv-wrappers.c Fri Apr 14 21:24:06 2023 -0400 +++ b/liboctave/wrappers/uniconv-wrappers.c Sat Apr 15 11:29:15 2023 +0200 @@ -93,6 +93,22 @@ } char * +octave_u16_conv_to_encoding (const char *tocode, const uint16_t *src, + size_t srclen, size_t *lengthp) +{ + return u16_conv_to_encoding (tocode, iconveh_question_mark, + src, srclen, NULL, NULL, lengthp); +} + +char * +octave_u16_conv_to_encoding_strict (const char *tocode, const uint16_t *src, + size_t srclen, size_t *lengthp) +{ + return u16_conv_to_encoding (tocode, iconveh_error, + src, srclen, NULL, NULL, lengthp); +} + +char * octave_u32_conv_to_encoding_strict (const char *tocode, const uint32_t *src, size_t srclen, size_t *lengthp) {
--- a/liboctave/wrappers/uniconv-wrappers.h Fri Apr 14 21:24:06 2023 -0400 +++ b/liboctave/wrappers/uniconv-wrappers.h Sat Apr 15 11:29:15 2023 +0200 @@ -54,6 +54,14 @@ size_t srclen, size_t *lengthp); extern OCTAVE_API char * +octave_u16_conv_to_encoding (const char *tocode, const uint16_t *src, + size_t srclen, size_t *lengthp); + +extern OCTAVE_API char * +octave_u16_conv_to_encoding_strict (const char *tocode, const uint16_t *src, + size_t srclen, size_t *lengthp); + +extern OCTAVE_API char * octave_u32_conv_to_encoding_strict (const char *tocode, const uint32_t *src, size_t srclen, size_t *lengthp);