Mercurial > octave
changeset 25361:82445187633e
Add support for arbitrary character encodings in m-files (bug #53842).
* input.cc: Add new variable "Vmfile_encoding". Convert from local encoding to
UTF-8 in file_reader::get_input. Add new function "__mfile_encoding__".
* iconv-wrappers.[c/h]: Add new wrapper files for iconv_open and iconv_close.
* wrappers/module.mk: Add new files.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Sat, 05 May 2018 20:13:18 +0200 |
parents | bc5f225bc578 |
children | def1b446ba64 |
files | libinterp/corefcn/input.cc liboctave/wrappers/iconv-wrappers.c liboctave/wrappers/iconv-wrappers.h liboctave/wrappers/module.mk |
diffstat | 4 files changed, 164 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/input.cc Tue May 08 11:53:34 2018 -0400 +++ b/libinterp/corefcn/input.cc Sat May 05 20:13:18 2018 +0200 @@ -37,8 +37,11 @@ #include "cmd-edit.h" #include "file-ops.h" +#include "iconv-wrappers.h" +#include "localcharset-wrapper.h" #include "quit.h" #include "str-vec.h" +#include "uniconv-wrappers.h" #include "bp-table.h" #include "builtin-defun-decls.h" @@ -110,6 +113,13 @@ static hook_function_list input_event_hook_functions; +// Codepage which is used to read .m files +#if defined (OCTAVE_USE_WINDOWS_API) +static std::string Vmfile_encoding = "system"; +#else +static std::string Vmfile_encoding = "utf-8"; +#endif + // For octave_quit. void remove_input_event_hook_functions (void) @@ -777,7 +787,34 @@ eof = false; - return octave_fgets (m_file, eof); + std::string src_str = octave_fgets (m_file, eof); + std::string encoding = Vmfile_encoding.compare ("system") == 0 + ? octave_locale_charset_wrapper () + : Vmfile_encoding; + + if (encoding.compare ("utf-8") != 0) + { + // convert encoding to UTF-8 before returning string + const char *src = src_str.c_str (); + size_t srclen = src_str.length (); + + size_t length; + uint8_t *utf8_str = nullptr; + + utf8_str = octave_u8_conv_from_encoding (encoding.c_str (), src, srclen, + &length); + + if (! utf8_str) + error ("file_reader::get_input: converting from codepage '%s' to UTF-8: %s", + encoding.c_str (), std::strerror (errno)); + + octave::unwind_protect frame; + frame.add_fcn (::free, static_cast<void *> (utf8_str)); + + src_str = std::string (reinterpret_cast<char *> (utf8_str), length); + } + + return src_str; } const std::string eval_string_reader::s_in_src ("eval_string"); @@ -1386,3 +1423,46 @@ return retval; } + +DEFUN (__mfile_encoding__, args, , + doc: /* -*- texinfo -*- +@deftypefn {} {@var{current_encoding} =} __mfile_encoding__ (@var{new_encoding}) +Set and query the codepage that is used for reading .m files. +@end deftypefn */) +{ + int nargin = args.length (); + + if (nargin > 1) + print_usage (); + + if (nargin > 0) + { + std::string str = args(0).xstring_value ( + "__mfile_encoding__: NEW_ENCODING must be a string designating a valid codepage."); + if (str.empty ()) +#if defined (OCTAVE_USE_WINDOWS_API) + Vmfile_encoding = "system"; +#else + Vmfile_encoding = "utf-8"; +#endif + else + { + std::transform (str.begin (), str.end (), str.begin (), ::tolower); + + std::string codepage = (str.compare ("system") == 0) + ? octave_locale_charset_wrapper () : str; + + // check if valid codepage + void *codec = octave_iconv_open_wrapper (codepage.c_str (), "utf-8"); + + if (errno == EINVAL) + error ("__mfile_encoding__: Conversion from codepage '%s' not supported", + codepage.c_str ()); + + octave_iconv_close_wrapper (codec); + + Vmfile_encoding = str; + } + } + return ovl (Vmfile_encoding); +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/wrappers/iconv-wrappers.c Sat May 05 20:13:18 2018 +0200 @@ -0,0 +1,41 @@ +/* + +Copyright (C) 2018 Markus Mützel + +This file is part of Octave. + +Octave is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Octave is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +<https://www.gnu.org/licenses/>. + +*/ + +#if defined (HAVE_CONFIG_H) +# include "config.h" +#endif + +#include "iconv.h" + +#include "iconv-wrappers.h" + +iconv_t +octave_iconv_open_wrapper (const char *tocode, const char *fromcode) +{ + return iconv_open (tocode, fromcode); +} + +int +octave_iconv_close_wrapper (iconv_t cd) +{ + return iconv_close (cd); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/liboctave/wrappers/iconv-wrappers.h Sat May 05 20:13:18 2018 +0200 @@ -0,0 +1,40 @@ +/* + +Copyright (C) 2018 Markus Mützel + +This file is part of Octave. + +Octave is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +Octave is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, see +<https://www.gnu.org/licenses/>. + +*/ + +#if ! defined (octave_iconv_wrappers_h) +#define octave_iconv_wrappers_h 1 + +#if defined __cplusplus +extern "C" { +#endif + +extern void * +octave_iconv_open_wrapper (const char *tocode, const char *fromcode); + +extern int +octave_iconv_close_wrapper (void *cd); + +#if defined __cplusplus +} +#endif + +#endif
--- a/liboctave/wrappers/module.mk Tue May 08 11:53:34 2018 -0400 +++ b/liboctave/wrappers/module.mk Sat May 05 20:13:18 2018 +0200 @@ -11,6 +11,7 @@ %reldir%/getopt-wrapper.h \ %reldir%/glob-wrappers.h \ %reldir%/hash-wrappers.h \ + %reldir%/iconv-wrappers.h \ %reldir%/localcharset-wrapper.h \ %reldir%/math-wrappers.h \ %reldir%/mkostemp-wrapper.h \ @@ -48,6 +49,7 @@ %reldir%/getopt-wrapper.c \ %reldir%/glob-wrappers.c \ %reldir%/hash-wrappers.c \ + %reldir%/iconv-wrappers.c \ %reldir%/localcharset-wrapper.c \ %reldir%/math-wrappers.c \ %reldir%/mkostemp-wrapper.c \