changeset 25532:7dad5fa7e88e

Add function for ASCII only file names (bug #49118). * lo-sysdep.[cc/h]: Add function "get_ASCII_filename" that constructs an ASCII-only file name corresponding to the input file name.
author Markus Mützel <markus.muetzel@gmx.de>
date Sat, 30 Jun 2018 20:11:05 +0200
parents c2dc2fb6359b
children d6850dd2a6b4
files liboctave/system/lo-sysdep.cc liboctave/system/lo-sysdep.h
diffstat 2 files changed, 76 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/liboctave/system/lo-sysdep.cc	Sat Jun 30 18:51:38 2018 +0200
+++ b/liboctave/system/lo-sysdep.cc	Sat Jun 30 20:11:05 2018 +0200
@@ -36,6 +36,8 @@
 #if defined (OCTAVE_USE_WINDOWS_API)
 #  include <windows.h>
 #  include <wchar.h>
+
+#  include "lo-hash.h"
 #endif
 
 namespace octave
@@ -194,5 +196,77 @@
       return retval;
     }
 
+    std::string
+    get_ASCII_filename (const std::string& orig_file_name)
+    {
+#if defined (OCTAVE_USE_WINDOWS_API)
+      // Return file name that only contains ASCII characters that can be used
+      // to access the file orig_file_name.  The original file must exist in the
+      // file system before calling this function.
+      // This is useful for passing file names to functions that are not aware
+      // of the character encoding we are using.
+
+      // 1. Check whether filename contains non-ASCII (UTF-8) characters
+      std::string::const_iterator first_non_ASCII = 
+                std::find_if (orig_file_name.begin (), orig_file_name.end (),
+                [](char c){return (c < 0 || c >= 128);});
+      if (first_non_ASCII == orig_file_name.end ())
+        return orig_file_name;
+
+      // 2. Check if file system stores short filenames (always ASCII-only).
+      const wchar_t *w_orig_file_name = u8_to_wstring (orig_file_name).c_str ();
+      // get short filename (8.3) from UTF-16 filename
+      long length = GetShortPathNameW (w_orig_file_name, NULL, 0);
+
+      // Dynamically allocate the correct size 
+      // (terminating null char was included in length)
+      wchar_t *w_short_file_name = new wchar_t[length];
+      length = GetShortPathNameW (w_orig_file_name, w_short_file_name, length);
+ 
+      std::string short_file_name = u8_from_wstring (std::wstring (w_short_file_name));
+
+      if (short_file_name.compare (orig_file_name) != 0)
+        return short_file_name;
+
+      // 3. Create hard link with only-ASCII characters
+      // Get longest possible part of path that only contains ASCII chars.
+      size_t pos = (std::string (orig_file_name.begin (), first_non_ASCII)).
+                        find_last_of (octave::sys::file_ops::dir_sep_chars ());
+      std::string par_dir = orig_file_name.substr (0, pos+1);
+      
+      // create .oct_ascii directory
+      // FIXME: We need to have write permission in this location.
+      std::string oct_ascii_dir = par_dir + ".oct_ascii";
+      std::string test_dir = canonicalize_file_name (oct_ascii_dir);
+      if (test_dir.empty ())
+      {
+        std::string msg;
+        int status = octave::sys::mkdir (oct_ascii_dir, 0777, msg);
+        if (status < 0)
+          return orig_file_name;
+        // set hidden property
+        SetFileAttributesA (oct_ascii_dir.c_str (), FILE_ATTRIBUTE_HIDDEN);
+      }
+
+      // create file from hash of full filename
+      std::string filename_hash = oct_ascii_dir + file_ops::dir_sep_str () +
+                                  octave::crypto::hash ("SHA1", orig_file_name);
+      std::string _filename_hash_ = canonicalize_file_name (filename_hash);
+      if (! _filename_hash_.empty ())
+        return _filename_hash_;
+
+      wchar_t w_filename_hash[filename_hash.length ()+1] = {0};
+      for (size_t i=0; i < filename_hash.length (); i++)
+        w_filename_hash[i] = filename_hash.at (i);
+      if (CreateHardLinkW (w_filename_hash, w_orig_file_name, NULL))
+        return filename_hash;
+
+      return orig_file_name;
+
+#else
+      return orig_file_name;
+#endif
+    }
+
   }
 }
--- a/liboctave/system/lo-sysdep.h	Sat Jun 30 18:51:38 2018 +0200
+++ b/liboctave/system/lo-sysdep.h	Sat Jun 30 20:11:05 2018 +0200
@@ -49,6 +49,8 @@
     extern std::wstring u8_to_wstring (const std::string&);
 
     extern std::string u8_from_wstring (const std::wstring&);
+
+    extern std::string get_ASCII_filename (const std::string& long_file_name);
   }
 }