Mercurial > octave
changeset 26705:c13143821eef
Add front-end support for encodings in "fopen" (bug #55452).
* file-io.cc (Ffopen): Add support for input and output argument "encoding".
(do_stream_open): Check if encoding is valid.
* oct-stream.cc (stream_list::get_info): Return encoding.
* io.tst (fopen): Adapt for new syntax. Add test for writing and reading with
specified encoding.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Sat, 09 Feb 2019 17:41:48 +0100 |
parents | 7c027116ee21 |
children | ccea3574f36b |
files | libinterp/corefcn/file-io.cc libinterp/corefcn/oct-stream.cc test/io.tst |
diffstat | 3 files changed, 75 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/file-io.cc Sat Feb 09 09:44:28 2019 -0800 +++ b/libinterp/corefcn/file-io.cc Sat Feb 09 17:41:48 2019 +0100 @@ -52,6 +52,7 @@ #include "file-ops.h" #include "file-stat.h" +#include "iconv-wrappers.h" #include "lo-ieee.h" #include "lo-sysdep.h" #include "mkostemp-wrapper.h" @@ -405,12 +406,24 @@ static octave::stream do_stream_open (const std::string& name, const std::string& mode_arg, - const std::string& arch, int& fid) + const std::string& arch, std::string encoding, int& fid) { octave::stream retval; fid = -1; + // Valid names for encodings consist of ASCII characters only. + std::transform (encoding.begin (), encoding.end (), encoding.begin (), + ::tolower); + if (encoding.compare ("utf-8")) + { + // check if encoding is valid + octave_iconv_open_wrapper (encoding.c_str (), "utf-8"); + if (errno == EINVAL) + error ("fopen: conversion from codepage '%s' not supported", + encoding.c_str ()); + } + std::string mode = mode_arg; bool use_zlib = false; normalize_fopen_mode (mode, use_zlib); @@ -440,8 +453,8 @@ gzFile gzf = ::gzdopen (fd, mode.c_str ()); - retval = octave_zstdiostream::create (fname, gzf, fd, - md, flt_fmt); + retval = octave_zstdiostream::create (fname, gzf, fd, md, + flt_fmt, encoding); } else retval.error (std::strerror (errno)); @@ -451,8 +464,8 @@ { FILE *fptr = octave::sys::fopen (fname.c_str (), mode.c_str ()); - retval = octave_stdiostream::create (fname, fptr, md, - flt_fmt); + retval = octave_stdiostream::create (fname, fptr, md, flt_fmt, + encoding); if (! fptr) retval.error (std::strerror (errno)); @@ -465,7 +478,8 @@ static octave::stream do_stream_open (const octave_value& tc_name, const octave_value& tc_mode, - const octave_value& tc_arch, const char *fcn, int& fid) + const octave_value& tc_arch, const octave_value& tc_encoding, + const char *fcn, int& fid) { octave::stream retval; @@ -474,8 +488,9 @@ std::string name = tc_name.xstring_value ("%s: filename must be a string", fcn); std::string mode = tc_mode.xstring_value ("%s: file mode must be a string", fcn); std::string arch = tc_arch.xstring_value ("%s: architecture type must be a string", fcn); - - retval = do_stream_open (name, mode, arch, fid); + std::string encoding = tc_encoding.xstring_value ("%s: ENCODING must be a string", fcn); + + retval = do_stream_open (name, mode, arch, encoding, fid); return retval; } @@ -485,19 +500,22 @@ @deftypefn {} {@var{fid} =} fopen (@var{name}) @deftypefnx {} {@var{fid} =} fopen (@var{name}, @var{mode}) @deftypefnx {} {@var{fid} =} fopen (@var{name}, @var{mode}, @var{arch}) +@deftypefnx {} {@var{fid} =} fopen (@var{name}, @var{mode}, @var{arch}, @var{encoding}) @deftypefnx {} {[@var{fid}, @var{msg}] =} fopen (@dots{}) @deftypefnx {} {@var{fid_list} =} fopen ("all") -@deftypefnx {} {[@var{file}, @var{mode}, @var{arch}] =} fopen (@var{fid}) +@deftypefnx {} {[@var{file}, @var{mode}, @var{arch}, @var{encoding}] =} fopen (@var{fid}) Open a file for low-level I/O or query open files and file descriptors. The first form of the @code{fopen} function opens the named file with -the specified mode (read-write, read-only, etc.@:) and architecture -interpretation (IEEE big endian, IEEE little endian, etc.), and returns -an integer value that may be used to refer to the file later. If an -error occurs, @var{fid} is set to @minus{}1 and @var{msg} contains the +the specified mode (read-write, read-only, etc.@:), architecture +interpretation (IEEE big endian, IEEE little endian, etc.) and file encoding, +and returns an integer value that may be used to refer to the file later. If +an error occurs, @var{fid} is set to @minus{}1 and @var{msg} contains the corresponding system error message. The @var{mode} is a one or two character string that specifies whether the file is to be opened for -reading, writing, or both. +reading, writing, or both. The @var{encoding} is a character string with a +valid code page identifier. This code page is used when strings are read from +or written to the file. The second form of the @code{fopen} function returns a vector of file ids corresponding to all the currently open files, excluding the @@ -571,10 +589,6 @@ IEEE little endian format. @end table -@noindent -However, conversions are currently only supported for @samp{native}, -@samp{ieee-be}, and @samp{ieee-le} formats. - When opening a new file that does not yet exist, permissions will be set to @code{0666 - @var{umask}}. @@ -590,7 +604,7 @@ { int nargin = args.length (); - if (nargin < 1 || nargin > 3) + if (nargin < 1 || nargin > 4) print_usage (); octave_value_list retval = ovl (-1.0); @@ -612,21 +626,22 @@ { string_vector tmp = streams.get_info (args(0)); - retval = ovl (tmp(0), tmp(1), tmp(2)); + retval = ovl (tmp(0), tmp(1), tmp(2), tmp(3)); return retval; } } - octave_value mode = (nargin == 2 || nargin == 3) - ? args(1) : octave_value ("r"); - - octave_value arch = (nargin == 3) - ? args(2) : octave_value ("native"); + octave_value mode = (nargin > 1) ? args(1) : octave_value ("r"); + + octave_value arch = (nargin > 2) ? args(2) : octave_value ("native"); + + octave_value encoding = (nargin > 3) ? args(3) : octave_value ("utf-8"); int fid = -1; - octave::stream os = do_stream_open (args(0), mode, arch, "fopen", fid); + octave::stream os = do_stream_open (args(0), mode, arch, encoding, "fopen", + fid); if (os) retval = ovl (streams.insert (os), ""); @@ -641,11 +656,12 @@ } /* -## FIXME: Only have tests for query mode. Need others for regular fopen call. +## Further tests are in io.tst %!test # Uses hardcoded value of 1 for stdout -%! [name, mode, arch] = fopen (1); +%! [name, mode, arch, encoding] = fopen (1); %! assert (name, "stdout"); %! assert (mode, "w"); +%! assert (encoding, "utf-8"); %!test # Query of non-existent stream returns all "" %! [name, mode, arch] = fopen (-1);
--- a/libinterp/corefcn/oct-stream.cc Sat Feb 09 09:44:28 2019 -0800 +++ b/libinterp/corefcn/oct-stream.cc Sat Feb 09 17:41:48 2019 +0100 @@ -7484,7 +7484,7 @@ string_vector stream_list::get_info (int fid) const { - string_vector retval (3); + string_vector retval (4); if (fid < 0) return retval; @@ -7509,6 +7509,7 @@ retval(0) = os.name (); retval(1) = stream::mode_as_string (os.mode ()); retval(2) = mach_info::float_format_as_string (os.float_format ()); + retval(3) = os.encoding (); return retval; }
--- a/test/io.tst Sat Feb 09 09:44:28 2019 -0800 +++ b/test/io.tst Sat Feb 09 17:41:48 2019 +0100 @@ -495,7 +495,7 @@ %! assert (__prog_output_assert__ ("error:")); %!error <Invalid call to fopen> fopen () -%!error <Invalid call to fopen> fopen ("foo", "wb", "native", 1) +%!error <Invalid call to fopen> fopen ("foo", "wb", "native", "utf-8", 1) %!error fclose (0) %!error <Invalid call to fclose> fclose (1, 2) @@ -650,6 +650,34 @@ %! unlink (nm); %! endif +%!test # write to and read from file with encoding +%! temp_file = [tempname(), ".txt"]; +%! fid = fopen (temp_file, "wt", "n", "latin 1"); +%! unwind_protect +%! [name, mode, arch, codepage] = fopen (fid); +%! assert (name, temp_file); +%! assert (mode, "w"); +%! assert (codepage, "latin 1"); +%! fprintf (fid, "aäu %s\n", "AÄU"); +%! fclose (fid); +%! # open in binary mode +%! fid2 = fopen (temp_file, "rb"); +%! [name, mode, arch, codepage] = fopen (fid2); +%! assert (name, temp_file); +%! assert (mode, "rb"); +%! assert (codepage, "utf-8"); +%! read_binary = fread (fid2); +%! fclose (fid2); +%! assert (read_binary, [97 228 117 32 65 196 85 10].'); +%! # open in text mode with correct encoding +%! fid3 = fopen (temp_file, "rt", "n", "latin 1"); +%! read_text = fscanf (fid3, "%s"); +%! fclose (fid3); +%! assert (read_text, "aäuAÄU"); +%! unwind_protect_cleanup +%! unlink (temp_file); +%! end_unwind_protect + %!assert (fputs (1, 1),-1) %!error <Invalid call to fputs> fputs ()