changeset 31149:7d4cf04665e6

oruntests.m: Convert read file from input encoding (bug #62780). * scripts/testfun/oruntests.m (has_functions, has_tests): Convert read binary data from input encoding to UTF-8. * test/file-encoding: Add tests for this use case.
author Markus Mützel <markus.muetzel@gmx.de>
date Wed, 20 Jul 2022 16:37:58 +0200
parents 94998332d787
children 60f32b7455e3
files scripts/testfun/oruntests.m test/file-encoding/CP1251/test_CP1251.m test/file-encoding/file-encoding.tst test/file-encoding/test_CP1252.m
diffstat 4 files changed, 65 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/testfun/oruntests.m	Mon Jul 18 17:30:08 2022 +0200
+++ b/scripts/testfun/oruntests.m	Wed Jul 20 16:37:58 2022 +0200
@@ -119,6 +119,11 @@
     endif
     str = fread (fid, "*char")';
     fclose (fid);
+    enc = dir_encoding (fileparts (f));
+    if (! strcmp (enc, "utf-8"))
+      ## convert from input encoding to UTF-8
+      str = native2unicode (uint8 (str), enc);
+    endif
     retval = ! isempty (regexp (str,'^(?:DEFUN|DEFUN_DLD|DEFUNX)\>',
                                     'lineanchors', 'once'));
   elseif (n > 2 && strcmpi (f((end-1):end), ".m"))
@@ -138,6 +143,11 @@
 
   str = fread (fid, "*char").';
   fclose (fid);
+  enc = dir_encoding (fileparts (f));
+  if (! strcmp (enc, "utf-8"))
+    ## convert from input encoding to UTF-8
+    str = native2unicode (uint8 (str), enc);
+  endif
   retval = ! isempty (regexp (str,
                               '^%!(assert|error|fail|test|xtest|warning)',
                               'lineanchors', 'once'));
--- a/test/file-encoding/CP1251/test_CP1251.m	Mon Jul 18 17:30:08 2022 +0200
+++ b/test/file-encoding/CP1251/test_CP1251.m	Wed Jul 20 16:37:58 2022 +0200
@@ -31,3 +31,7 @@
 function test_CP1251 ()
   help ("test_CP1251");
 endfunction
+
+%!assert (double ("ÄÖÜäöü ŠŽšž"),
+%!        [208 148 208 166 208 172 208 180 209 134 209 140 32 ...
+%!         208 137 208 139 209 153 209 155])
--- a/test/file-encoding/file-encoding.tst	Mon Jul 18 17:30:08 2022 +0200
+++ b/test/file-encoding/file-encoding.tst	Wed Jul 20 16:37:58 2022 +0200
@@ -23,6 +23,10 @@
 ##
 ########################################################################
 
+## Note: The cache of dir_encoding from .oct-config files in the load path
+## persists even after removing the folder from the load path.
+## Explictily, delete it when removing the path.
+
 ## test file in current directory
 
 %!assert (dir_encoding ("."), "windows-1252")
@@ -48,6 +52,7 @@
 %!   assert (dir_encoding (fullfile (pwd (), "CP1251")), "windows-1251");
 %! unwind_protect_cleanup
 %!   path (path_orig);
+%!   dir_encoding (canonicalize_file_name ("CP1251"), "delete");
 %! end_unwind_protect
 
 %!test
@@ -63,6 +68,7 @@
 %!   assert (strfind (help_str, ref_str));
 %! unwind_protect_cleanup
 %!   path (path_orig);
+%!   dir_encoding (canonicalize_file_name ("CP1251"), "delete");
 %! end_unwind_protect
 
 
@@ -79,6 +85,7 @@
 %!   assert (dir_encoding (fullfile (pwd (), "CP1251")), "windows-1251");
 %! unwind_protect_cleanup
 %!   path (path_orig);
+%!   dir_encoding (canonicalize_file_name ("CP1251"), "delete");
 %! end_unwind_protect
 
 %!test
@@ -95,5 +102,45 @@
 %!   assert (strfind (help_str, ref_str));
 %! unwind_protect_cleanup
 %!   path (path_orig);
+%!   dir_encoding (canonicalize_file_name ("CP1251"), "delete");
+%! end_unwind_protect
+
+
+## oruntests with file in current folder with .oct-config file
+%!test <*62780>
+%! ## wrap in "evalc" to suppress output to the log
+%! evalc ('oruntests (".");');
+
+## oruntests with file in different folder (not in load path) with
+## "dir_encoding"
+%!test <*62780>
+%! unwind_protect
+%!   dir_encoding (canonicalize_file_name ("CP1251"), "windows-1251");
+%!   ## use "evalc" to suppress output to the log
+%!   evalc ('oruntests ("CP1251");');
+%! unwind_protect_cleanup
+%!   dir_encoding (canonicalize_file_name ("CP1251"), "delete");
 %! end_unwind_protect
 
+## oruntests with file in different folder (not in load path) with
+## "__mfile_encoding__"
+%!test <*62780>
+%! old_mfile_encoding = __mfile_encoding__ ("windows-1251");
+%! unwind_protect
+%!   ## use "evalc" to suppress output to the log
+%!   evalc ('oruntests ("CP1251");');
+%! unwind_protect_cleanup
+%!   __mfile_encoding__ (old_mfile_encoding);
+%! end_unwind_protect
+
+## oruntests with file in different folder with .oct-config file (in load path)
+%!test <*62780>
+%! path_orig = path ();
+%! unwind_protect
+%!   addpath (canonicalize_file_name ("CP1251"));
+%!   ## use "evalc" to suppress output to the log
+%!   evalc ('oruntests ("CP1251");');
+%! unwind_protect_cleanup
+%!   path (path_orig);
+%!   dir_encoding (canonicalize_file_name ("CP1251"), "delete");
+%! end_unwind_protect
--- a/test/file-encoding/test_CP1252.m	Mon Jul 18 17:30:08 2022 +0200
+++ b/test/file-encoding/test_CP1252.m	Wed Jul 20 16:37:58 2022 +0200
@@ -31,3 +31,7 @@
 function test_CP1252 ()
   help ("test_CP1252");
 endfunction
+
+%!assert (double ("ÄÖÜäöü ŠŽšž"),
+%!        [195 132 195 150 195 156 195 164 195 182 195 188 32 ...
+%!         197 160 197 189 197 161 197 190])