Mercurial > octave
diff libinterp/corefcn/input.cc @ 25361:82445187633e
Add support for arbitrary character encodings in m-files (bug #53842).
* input.cc: Add new variable "Vmfile_encoding". Convert from local encoding to
UTF-8 in file_reader::get_input. Add new function "__mfile_encoding__".
* iconv-wrappers.[c/h]: Add new wrapper files for iconv_open and iconv_close.
* wrappers/module.mk: Add new files.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Sat, 05 May 2018 20:13:18 +0200 |
parents | 6652d3823428 |
children | 9cc1ca6538e3 |
line wrap: on
line diff
--- a/libinterp/corefcn/input.cc Tue May 08 11:53:34 2018 -0400 +++ b/libinterp/corefcn/input.cc Sat May 05 20:13:18 2018 +0200 @@ -37,8 +37,11 @@ #include "cmd-edit.h" #include "file-ops.h" +#include "iconv-wrappers.h" +#include "localcharset-wrapper.h" #include "quit.h" #include "str-vec.h" +#include "uniconv-wrappers.h" #include "bp-table.h" #include "builtin-defun-decls.h" @@ -110,6 +113,13 @@ static hook_function_list input_event_hook_functions; +// Codepage which is used to read .m files +#if defined (OCTAVE_USE_WINDOWS_API) +static std::string Vmfile_encoding = "system"; +#else +static std::string Vmfile_encoding = "utf-8"; +#endif + // For octave_quit. void remove_input_event_hook_functions (void) @@ -777,7 +787,34 @@ eof = false; - return octave_fgets (m_file, eof); + std::string src_str = octave_fgets (m_file, eof); + std::string encoding = Vmfile_encoding.compare ("system") == 0 + ? octave_locale_charset_wrapper () + : Vmfile_encoding; + + if (encoding.compare ("utf-8") != 0) + { + // convert encoding to UTF-8 before returning string + const char *src = src_str.c_str (); + size_t srclen = src_str.length (); + + size_t length; + uint8_t *utf8_str = nullptr; + + utf8_str = octave_u8_conv_from_encoding (encoding.c_str (), src, srclen, + &length); + + if (! utf8_str) + error ("file_reader::get_input: converting from codepage '%s' to UTF-8: %s", + encoding.c_str (), std::strerror (errno)); + + octave::unwind_protect frame; + frame.add_fcn (::free, static_cast<void *> (utf8_str)); + + src_str = std::string (reinterpret_cast<char *> (utf8_str), length); + } + + return src_str; } const std::string eval_string_reader::s_in_src ("eval_string"); @@ -1386,3 +1423,46 @@ return retval; } + +DEFUN (__mfile_encoding__, args, , + doc: /* -*- texinfo -*- +@deftypefn {} {@var{current_encoding} =} __mfile_encoding__ (@var{new_encoding}) +Set and query the codepage that is used for reading .m files. +@end deftypefn */) +{ + int nargin = args.length (); + + if (nargin > 1) + print_usage (); + + if (nargin > 0) + { + std::string str = args(0).xstring_value ( + "__mfile_encoding__: NEW_ENCODING must be a string designating a valid codepage."); + if (str.empty ()) +#if defined (OCTAVE_USE_WINDOWS_API) + Vmfile_encoding = "system"; +#else + Vmfile_encoding = "utf-8"; +#endif + else + { + std::transform (str.begin (), str.end (), str.begin (), ::tolower); + + std::string codepage = (str.compare ("system") == 0) + ? octave_locale_charset_wrapper () : str; + + // check if valid codepage + void *codec = octave_iconv_open_wrapper (codepage.c_str (), "utf-8"); + + if (errno == EINVAL) + error ("__mfile_encoding__: Conversion from codepage '%s' not supported", + codepage.c_str ()); + + octave_iconv_close_wrapper (codec); + + Vmfile_encoding = str; + } + } + return ovl (Vmfile_encoding); +} \ No newline at end of file