Mercurial > octave
changeset 25615:ffc858064239
style fixes and commentary for get_ASCII_filename function (bug #54299)
* lo-sysdep.cc (get_ASCII_filename): Include some explanatory comments.
Style fixes.
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Fri, 13 Jul 2018 16:55:08 -0400 |
parents | 7af73a062e9e |
children | bd362df6277c |
files | liboctave/system/lo-sysdep.cc |
diffstat | 1 files changed, 89 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/liboctave/system/lo-sysdep.cc Fri Jul 13 16:26:55 2018 -0400 +++ b/liboctave/system/lo-sysdep.cc Fri Jul 13 16:55:08 2018 -0400 @@ -196,77 +196,137 @@ return retval; } + // At quite a few places in the code we are passing file names as + // char arrays to external library functions. + + // When these functions try to locate the corresponding file on the + // disc, they need to use the wide character API on Windows to + // correctly open files with non-ASCII characters. + + // But they have no way of knowing which encoding we are using for + // the passed string. So they have no way of reliably converting to + // a wchar_t array. (I.e. there is no possible fix for these + // functions with current C or C++.) + + // To solve the dilemma, the function "get_ASCII_filename" first + // checks whether there are any non-ASCII characters in the passed + // file name. If there are not, it returns the original name. + + // Otherwise, it tries to obtain the short file name (8.3 naming + // scheme) which only consists of ASCII characters and are safe to + // pass. However, short file names can be disabled for performance + // reasons on the file system level with NTFS. So there is no + // guarantee that these exist. + + // If short file names are not stored, a hard link to the file is + // created. For this the path to the file is split at the deepest + // possible level that doesn't contain non-ASCII characters. At + // that level a hidden folder is created that holds the hard links. + // That means we need to have write access on that location. A path + // to that hard link is returned. + + // If the file system is FAT32, there are no hard links. But FAT32 + // always stores short file names. So we are safe. + + // ExFAT that is occasionally used on USB sticks and SD cards stores + // neither short file names nor does it support hard links. So for + // exFAT with this function, there is (currently) no way to generate + // a file name that is stripped from non-ASCII characters but still + // is valid. + + // For Unixy systems, this function does nothing. + std::string get_ASCII_filename (const std::string& orig_file_name) { #if defined (OCTAVE_USE_WINDOWS_API) - // Return file name that only contains ASCII characters that can be used - // to access the file orig_file_name. The original file must exist in the - // file system before calling this function. - // This is useful for passing file names to functions that are not aware - // of the character encoding we are using. + + // Return file name that only contains ASCII characters that can + // be used to access the file orig_file_name. The original file + // must exist in the file system before calling this function. + // This is useful for passing file names to functions that are not + // aware of the character encoding we are using. - // 1. Check whether filename contains non-ASCII (UTF-8) characters - std::string::const_iterator first_non_ASCII = - std::find_if (orig_file_name.begin (), orig_file_name.end (), - [](char c){return (c < 0 || c >= 128);}); + // 1. Check whether filename contains non-ASCII (UTF-8) characters. + + std::string::const_iterator first_non_ASCII + = std::find_if (orig_file_name.begin (), orig_file_name.end (), + [](char c) { return (c < 0 || c >= 128); }); + if (first_non_ASCII == orig_file_name.end ()) return orig_file_name; - // 2. Check if file system stores short filenames (always ASCII-only). + // 2. Check if file system stores short filenames (always + // ASCII-only). + const wchar_t *w_orig_file_name = u8_to_wstring (orig_file_name).c_str (); - // get short filename (8.3) from UTF-16 filename + + // Get short filename (8.3) from UTF-16 filename. + long length = GetShortPathNameW (w_orig_file_name, NULL, 0); - // Dynamically allocate the correct size - // (terminating null char was included in length) + // Dynamically allocate the correct size (terminating null char + // was included in length). + wchar_t *w_short_file_name = new wchar_t[length]; length = GetShortPathNameW (w_orig_file_name, w_short_file_name, length); - std::string short_file_name = u8_from_wstring (std::wstring (w_short_file_name)); + std::string short_file_name + = u8_from_wstring (std::wstring (w_short_file_name)); if (short_file_name.compare (orig_file_name) != 0) return short_file_name; - // 3. Create hard link with only-ASCII characters + // 3. Create hard link with only-ASCII characters. // Get longest possible part of path that only contains ASCII chars. - size_t pos = (std::string (orig_file_name.begin (), first_non_ASCII)). - find_last_of (octave::sys::file_ops::dir_sep_chars ()); + + std::string tmp_substr + = std::string (orig_file_name.begin (), first_non_ASCII); + + size_t pos + = tmp_substr.find_last_of (octave::sys::file_ops::dir_sep_chars ()); + std::string par_dir = orig_file_name.substr (0, pos+1); - // create .oct_ascii directory + // Create .oct_ascii directory. // FIXME: We need to have write permission in this location. + std::string oct_ascii_dir = par_dir + ".oct_ascii"; std::string test_dir = canonicalize_file_name (oct_ascii_dir); + if (test_dir.empty ()) { std::string msg; int status = octave::sys::mkdir (oct_ascii_dir, 0777, msg); + if (status < 0) return orig_file_name; - // set hidden property + + // Set hidden property. SetFileAttributesA (oct_ascii_dir.c_str (), FILE_ATTRIBUTE_HIDDEN); } - // create file from hash of full filename - std::string filename_hash = oct_ascii_dir + file_ops::dir_sep_str () + - octave::crypto::hash ("SHA1", orig_file_name); - std::string _filename_hash_ = canonicalize_file_name (filename_hash); - if (! _filename_hash_.empty ()) - return _filename_hash_; + // Create file from hash of full filename. + std::string filename_hash + = (oct_ascii_dir + file_ops::dir_sep_str () + + octave::crypto::hash ("SHA1", orig_file_name)); + + std::string abs_filename_hash = canonicalize_file_name (filename_hash); + + if (! abs_filename_hash.empty ()) + return abs_filename_hash; wchar_t w_filename_hash[filename_hash.length ()+1] = {0}; + for (size_t i=0; i < filename_hash.length (); i++) w_filename_hash[i] = filename_hash.at (i); + if (CreateHardLinkW (w_filename_hash, w_orig_file_name, NULL)) return filename_hash; - return orig_file_name; +#endif -#else return orig_file_name; -#endif } - } }