changeset 31149:f125ddb46055

dir_encoding: Use encoding from .oct-config file in current directory (bug #62761). * libinterp/corefcn/load-path.cc (load_path::read_dir_config), libinterp/corefcn/input.cc (input_system::dir_encoding): Use canonicalized path as key for the dir_encoding map. * test/file-encoding: Add tests for this feature. * test/Makefile.am: Add new folder to test suite.
author Markus Mützel <markus.muetzel@gmx.de>
date Wed, 13 Jul 2022 13:20:48 +0200
parents 52b8b0c1baf9
children b726aff748e3
files libinterp/corefcn/input.cc libinterp/corefcn/load-path.cc test/Makefile.am test/file-encoding/.oct-config test/file-encoding/CP1251/.oct-config test/file-encoding/CP1251/test_CP1251.m test/file-encoding/file-encoding.tst test/file-encoding/module.mk test/file-encoding/test_CP1252.m
diffstat 9 files changed, 187 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/libinterp/corefcn/input.cc	Tue Jul 12 17:52:20 2022 +0200
+++ b/libinterp/corefcn/input.cc	Wed Jul 13 13:20:48 2022 +0200
@@ -44,6 +44,7 @@
 #include "file-ops.h"
 #include "iconv-wrappers.h"
 #include "localcharset-wrapper.h"
+#include "oct-env.h"
 #include "oct-string.h"
 #include "quit.h"
 #include "str-vec.h"
@@ -573,8 +574,10 @@
   std::string input_system::dir_encoding (const std::string& dir)
   {
     std::string enc = m_mfile_encoding;
+    // use canonicalized path as key
+    const std::string key = sys::canonicalize_file_name (dir);
 
-    auto enc_it = m_dir_encoding.find (load_path_dir (dir));
+    auto enc_it = m_dir_encoding.find (key);
     if (enc_it != m_dir_encoding.end ())
       enc = enc_it->second;
 
--- a/libinterp/corefcn/load-path.cc	Tue Jul 12 17:52:20 2022 +0200
+++ b/libinterp/corefcn/load-path.cc	Wed Jul 13 13:20:48 2022 +0200
@@ -1204,9 +1204,12 @@
   void
   load_path::read_dir_config (const std::string& dir) const
   {
+    // use canonicalized path as key
+    const std::string key = sys::canonicalize_file_name (dir);
+
     // read file with directory configuration
-    std::string conf_file = dir + sys::file_ops::dir_sep_str ()
-                            + ".oct-config";
+    const std::string
+    conf_file = key + sys::file_ops::dir_sep_str () + ".oct-config";
 
     FILE* cfile = sys::fopen (conf_file, "rb");
 
@@ -1216,7 +1219,7 @@
         input_system& input_sys = __get_input_system__ ();
 
         std::string enc_val = "delete";
-        input_sys.set_dir_encoding (dir, enc_val);
+        input_sys.set_dir_encoding (key, enc_val);
         return;
       }
 
@@ -1257,7 +1260,7 @@
 
             // set encoding for this directory in input system
             input_system& input_sys = __get_input_system__ ();
-            input_sys.set_dir_encoding (dir, enc_val);
+            input_sys.set_dir_encoding (key, enc_val);
             return;
           }
       }
--- a/test/Makefile.am	Tue Jul 12 17:52:20 2022 +0200
+++ b/test/Makefile.am	Wed Jul 13 13:20:48 2022 +0200
@@ -114,6 +114,7 @@
 include colon-op/module.mk
 include ctor-vs-method/module.mk
 include fcn-handle/module.mk
+include file-encoding/module.mk
 include json/module.mk
 include jupyter-notebook/module.mk
 include local-functions/module.mk
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/file-encoding/.oct-config	Wed Jul 13 13:20:48 2022 +0200
@@ -0,0 +1,1 @@
+encoding=windows-1252
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/file-encoding/CP1251/.oct-config	Wed Jul 13 13:20:48 2022 +0200
@@ -0,0 +1,1 @@
+encoding=windows-1251
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/file-encoding/CP1251/test_CP1251.m	Wed Jul 13 13:20:48 2022 +0200
@@ -0,0 +1,33 @@
+########################################################################
+##
+## Copyright (C) 2022 The Octave Project Developers
+##
+## See the file COPYRIGHT.md in the top-level directory of this
+## distribution or <https://octave.org/copyright/>.
+##
+## This program is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 3 of the
+## License, or (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, see <http://www.gnu.org/licenses/>.
+##
+########################################################################
+
+## -*- texinfo -*-
+## @deftypefn {} {} test_CP1251 ()
+## Test function with some characters from CP1251
+##
+## ÄÖÜäöü ŠŽšž
+##
+## @end deftypefn
+
+function test_CP1251 ()
+  help ("test_CP1251");
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/file-encoding/file-encoding.tst	Wed Jul 13 13:20:48 2022 +0200
@@ -0,0 +1,99 @@
+########################################################################
+##
+## Copyright (C) 2022 The Octave Project Developers
+##
+## See the file COPYRIGHT.md in the top-level directory of this
+## distribution or <https://octave.org/copyright/>.
+##
+## This file is part of Octave.
+##
+## Octave is free software: you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <https://www.gnu.org/licenses/>.
+##
+########################################################################
+
+## test file in current directory
+
+%!assert (dir_encoding ("."), "windows-1252")
+%!assert (dir_encoding (pwd ()), "windows-1252")
+
+%!test
+%! help_str = get_help_text ("test_CP1252");
+%! ## The help text contains the string "ÄÖÜäöü ŠŽšž".  Compare to the
+%! ## corresponding UTF-8 byte sequence to make sure this test is independent
+%! ## of how this .tst file is interpreted.
+%! ref_str = char ([195 132 195 150 195 156 195 164 195 182 195 188 32 ...
+%!                  197 160 197 189 197 161 197 190]);
+%! assert (strfind (help_str, ref_str));
+
+
+## test file in load path (relative)
+
+%!test
+%! path_orig = path ();
+%! unwind_protect
+%!   addpath ("CP1251");
+%!   assert (dir_encoding ("CP1251"), "windows-1251");
+%!   assert (dir_encoding (fullfile (pwd (), "CP1251")), "windows-1251");
+%! unwind_protect_cleanup
+%!   path (path_orig);
+%! end_unwind_protect
+
+%!test
+%! path_orig = path ();
+%! unwind_protect
+%!   addpath ("CP1251");
+%!   help_str = get_help_text ("test_CP1251");
+%!   ## The help text contains the string "ДЦЬдць ЉЋљћ".  Compare to the
+%!   ## corresponding UTF-8 byte sequence to make sure this test is independent
+%!   ## of how this .tst of how this .tst file is interpreted.
+%!   ref_str = char ([208 148 208 166 208 172 208 180 209 134 209 140 32 ...
+%!                    208 137 208 139 209 153 209 155]);
+%!   assert (strfind (help_str, ref_str));
+%! unwind_protect_cleanup
+%!   path (path_orig);
+%! end_unwind_protect
+
+
+## test file in load path (absolute)
+
+%!test
+%! clear all;  # make sure files are re-parsed
+%! path_orig = path ();
+%! unwind_protect
+%!   new_path = canonicalize_file_name ("CP1251");
+%!   addpath (new_path);
+%!   assert (dir_encoding (new_path), "windows-1251");
+%!   assert (dir_encoding ("CP1251"), "windows-1251");
+%!   assert (dir_encoding (fullfile (pwd (), "CP1251")), "windows-1251");
+%! unwind_protect_cleanup
+%!   path (path_orig);
+%! end_unwind_protect
+
+%!test
+%! clear all;  # make sure files are re-parsed
+%! path_orig = path ();
+%! unwind_protect
+%!   addpath (canonicalize_file_name ("CP1251"));
+%!   help_str = get_help_text ("test_CP1251");
+%!   ## The help text contains the string "ДЦЬдць ЉЋљћ".  Compare to the UTF-8
+%!   ## byte sequence to make sure this test is independent of how this .tst
+%!   ## file is interpreted.
+%!   ref_str = char ([208 148 208 166 208 172 208 180 209 134 209 140 32 ...
+%!                    208 137 208 139 209 153 209 155]);
+%!   assert (strfind (help_str, ref_str));
+%! unwind_protect_cleanup
+%!   path (path_orig);
+%! end_unwind_protect
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/file-encoding/module.mk	Wed Jul 13 13:20:48 2022 +0200
@@ -0,0 +1,8 @@
+pkg_TEST_FILES = \
+    %reldir%/.oct-config \
+    %reldir%/file-encoding.tst \
+    %reldir%/test_CP1252.m \
+    %reldir%/CP1251/.oct-config \
+    %reldir%/CP1251/test_CP1251.m
+
+TEST_FILES += $(pkg_TEST_FILES)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/file-encoding/test_CP1252.m	Wed Jul 13 13:20:48 2022 +0200
@@ -0,0 +1,33 @@
+########################################################################
+##
+## Copyright (C) 2022 The Octave Project Developers
+##
+## See the file COPYRIGHT.md in the top-level directory of this
+## distribution or <https://octave.org/copyright/>.
+##
+## This program is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 3 of the
+## License, or (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, see <http://www.gnu.org/licenses/>.
+##
+########################################################################
+
+## -*- texinfo -*-
+## @deftypefn {} {} test_CP1252 ()
+## Test function with some characters from CP1252
+##
+## ÄÖÜäöü ŠŽšž
+##
+## @end deftypefn
+
+function test_CP1252 ()
+  help ("test_CP1252");
+endfunction