changeset 30243:a4061ae5ff79

Use UTF-8 file API if supported by HDF5 library. * m4/acinclude.m4 (OCTAVE_CHECK_HDF5_HAS_UTF8_API): Add test to check if HDF5 library uses UTF-8 file API on Windows (version 1.12 or newer). * configure.ac (HDF5): Run new configure test. * libinterp/corefcn/ls-hdf5.cc (hdf5_fstreambase::open_create), libinterp/corefcn/load-save.cc (load_save_system::get_file_format, load_save_system::save): The kludge that was used to work with non-ASCII characters in HDF5 file names can be removed if the used version of HDF5 supports UTF-8 encoded file names.
author Markus Mützel <markus.muetzel@gmx.de>
date Tue, 19 Oct 2021 14:26:04 +0200
parents 33d895260fa4
children 4fe897f5db8e
files configure.ac libinterp/corefcn/load-save.cc libinterp/corefcn/ls-hdf5.cc m4/acinclude.m4
diffstat 4 files changed, 50 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/configure.ac	Sun Oct 17 12:28:51 2021 +0200
+++ b/configure.ac	Tue Oct 19 14:26:04 2021 +0200
@@ -1474,6 +1474,7 @@
   [hdf5.h], [H5Gget_num_objs], [], [],
   [warn_hdf5=
    OCTAVE_CHECK_HDF5_HAS_VER_16_API
+   OCTAVE_CHECK_HDF5_HAS_UTF8_API
    AC_DEFINE(HAVE_HDF5, 1,
      [Define to 1 if HDF5 is available and newer than version 1.6.])
    if test $have_msvc = yes; then
--- a/libinterp/corefcn/load-save.cc	Sun Oct 17 12:28:51 2021 +0200
+++ b/libinterp/corefcn/load-save.cc	Tue Oct 19 14:26:04 2021 +0200
@@ -330,7 +330,11 @@
   {
     load_save_format retval = UNKNOWN;
 
+#if defined (HAVE_HDF5_UTF8)
+    std::string ascii_fname = fname;
+#else
     std::string ascii_fname = sys::get_ASCII_filename (fname);
+#endif
 
 #if defined (HAVE_HDF5)
     // check this before we open the file
@@ -1481,10 +1485,15 @@
             if (append)
               error ("save: appending to HDF5 files is not implemented");
 
+#  if defined (HAVE_HDF5_UTF8)
+            bool write_header_info
+              = ! (append && H5Fis_hdf5 (fname.c_str ()) > 0);
+#  else
             std::string ascii_fname = sys::get_ASCII_filename (fname);
 
             bool write_header_info
               = ! (append && H5Fis_hdf5 (ascii_fname.c_str ()) > 0);
+#  endif
 
             hdf5_ofstream hdf5_file (fname.c_str (), mode);
 
--- a/libinterp/corefcn/ls-hdf5.cc	Sun Oct 17 12:28:51 2021 +0200
+++ b/libinterp/corefcn/ls-hdf5.cc	Tue Oct 19 14:26:04 2021 +0200
@@ -144,18 +144,25 @@
 #if defined (HAVE_HDF5)
   // Open the HDF5 file NAME.  If it does not exist, create the file.
 
+#  if defined (HAVE_HDF5_UTF8)
+  const char *fname = name;
+#  else
   std::string fname_str (name);
   std::string ascii_fname_str = octave::sys::get_ASCII_filename (fname_str);
-  const char *ascii_fname = ascii_fname_str.c_str ();
+  const char *fname = ascii_fname_str.c_str ();
+#  endif
 
   if (mode & std::ios::in)
-    file_id = H5Fopen (ascii_fname, H5F_ACC_RDONLY, octave_H5P_DEFAULT);
+    file_id = H5Fopen (fname, H5F_ACC_RDONLY, octave_H5P_DEFAULT);
   else if (mode & std::ios::out)
     {
-      if (mode & std::ios::app && H5Fis_hdf5 (ascii_fname) > 0)
-        file_id = H5Fopen (ascii_fname, H5F_ACC_RDWR, octave_H5P_DEFAULT);
+      if (mode & std::ios::app && H5Fis_hdf5 (fname) > 0)
+        file_id = H5Fopen (fname, H5F_ACC_RDWR, octave_H5P_DEFAULT);
       else
-#  if defined (OCTAVE_USE_WINDOWS_API)
+#  if defined (HAVE_HDF5_UTF8)
+        file_id = H5Fcreate (fname, H5F_ACC_TRUNC, octave_H5P_DEFAULT,
+                             octave_H5P_DEFAULT);
+#  else
         {
           // Check whether file already exists
           std::string abs_ascii_fname
@@ -163,7 +170,7 @@
           if (! abs_ascii_fname.empty ())
             {
               // Use the existing file
-              file_id = H5Fcreate (ascii_fname, H5F_ACC_TRUNC,
+              file_id = H5Fcreate (fname, H5F_ACC_TRUNC,
                                    octave_H5P_DEFAULT, octave_H5P_DEFAULT);
               if (file_id < 0)
                 std::ios::setstate (std::ios::badbit);
@@ -216,9 +223,6 @@
           ascii_fname = ascii_fname_str.c_str ();
           file_id = H5Fopen (ascii_fname, H5F_ACC_RDWR, octave_H5P_DEFAULT);
         }
-#  else
-        file_id = H5Fcreate (name, H5F_ACC_TRUNC, octave_H5P_DEFAULT,
-                             octave_H5P_DEFAULT);
 #  endif
     }
   if (file_id < 0)
--- a/m4/acinclude.m4	Sun Oct 17 12:28:51 2021 +0200
+++ b/m4/acinclude.m4	Tue Oct 19 14:26:04 2021 +0200
@@ -780,6 +780,33 @@
   fi
 ])
 dnl
+dnl Check whether HDF5 library has UTF-8 file API.
+dnl
+AC_DEFUN([OCTAVE_CHECK_HDF5_HAS_UTF8_API], [
+  AC_CACHE_CHECK([whether HDF5 library has UTF-8 file API],
+    [octave_cv_hdf5_has_utf8_api],
+    [case $host_os in
+      msdosmsvc | mingw*)
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+          #include <stddef.h>
+          const wchar_t *H5_get_utf16_str(const char *s);
+          ]], [[
+          H5_get_utf16_str ("");
+          ]])],
+          octave_cv_hdf5_has_utf8_api=yes,
+          octave_cv_hdf5_has_utf8_api=no)
+      ;;
+      *)
+        ## Assume yes on all other platforms
+        octave_cv_hdf5_has_utf8_api=yes
+      ;;
+     esac
+    ])
+  if test $octave_cv_hdf5_has_utf8_api = yes; then
+    AC_DEFINE(HAVE_HDF5_UTF8, 1, [Define to 1 if HDF5 has UTF-8 file API.])
+  fi
+])
+dnl
 dnl Usage:
 dnl OCTAVE_CHECK_LIB(LIBRARY, DOC-NAME, WARN-MSG, HEADER, FUNC,
 dnl                  LANG, DOC-STRING, EXTRA-CHECK, PKG-CONFIG-NAME,