Mercurial > octave
changeset 22339:966f75d09cb0
maint: rename xzip.cc to gzip.cc so Octave does think there's a xzip function.
* libinterp/dldfcn/xzip.cc: move to gzip.cc. Octave expects oct files to at
the very least, have a function named like the file.
* libinterp/dldfcn/gzip.cc: moved from xzip.cc. Use the name of the most
recognizable function inside it.
* libinterp/dldfcn/module-files: change filename.
author | Carnë Draug <carandraug@octave.org> |
---|---|
date | Thu, 18 Aug 2016 15:15:37 +0100 |
parents | e318739b0edd |
children | 9b87458451f8 |
files | libinterp/dldfcn/gzip.cc libinterp/dldfcn/module-files libinterp/dldfcn/xzip.cc |
diffstat | 3 files changed, 783 insertions(+), 783 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libinterp/dldfcn/gzip.cc Thu Aug 18 15:15:37 2016 +0100 @@ -0,0 +1,782 @@ +// Copyright (C) 2016 Carnë Draug +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. + +//! Octave interface to the compression and uncompression libraries. +/*! + This was originally implemented as an m file which directly called + bzip2 and gzip applications. This may look simpler but causes some + issues (see bug #43431) because we have no control over the output + file: + + - created file is always in the same directory as the original file; + - automatically skip files that already have gz/bz2/etc extension; + - some older versions lack the --keep option. + + In addition, because system() does not have a method that allows + passing a list of arguments, there is the issue of having to escape + filenames. + + A solution is to pipe file contents into the applications instead of + filenames. However, that solution causes: + + # missing file header with original file information; + # implementing ourselves the recursive transversion of directories; + # do the above in a m file which will be slow; + # popen2 is frail on windows. + +*/ + +#if defined (HAVE_CONFIG_H) +# include "config.h" +#endif + +#include <cstdio> +#include <cstring> + +#include <string> +#include <list> +#include <functional> +#include <stdexcept> +#include <iostream> +#include <fstream> + +#if defined (HAVE_BZLIB_H) +# include <bzlib.h> +#endif + +#if defined (HAVE_ZLIB_H) +# include <zlib.h> +#endif + +#include "Array.h" +#include "str-vec.h" +#include "glob-match.h" +#include "file-ops.h" +#include "dir-ops.h" +#include "file-stat.h" +#include "oct-env.h" + +#include "defun-dld.h" +#include "defun-int.h" +#include "errwarn.h" + +namespace octave +{ + //! RIIA wrapper for std::FILE* + /*! If error handling is available for failing to close the file, use + the close method which throws. + + If the file has been closed, fp is set to nullptr. Remember that + behavior is undefined if the value of the pointer stream is used + after fclose. + */ + + class CFile + { + public: + std::FILE* fp; + + CFile (const std::string& path, const std::string& mode) + { + fp = std::fopen (path.c_str (), mode.c_str ()); + if (! fp) + throw std::runtime_error ("unable to open file"); + } + + void + close (void) + { + if (std::fclose (fp)) + throw std::runtime_error ("unable to close file"); + fp = nullptr; + } + + ~CFile () + { + if (fp) + std::fclose (fp); + } + }; + +#if defined (HAVE_BZ2) + + class bz2 + { + private: + class zipper + { + private: + int status = BZ_OK; + CFile source; + CFile dest; + BZFILE* bz; + + public: + zipper (const std::string& source_path, const std::string& dest_path) + : source (source_path, "rb"), dest (dest_path, "wb") + { + bz = BZ2_bzWriteOpen (&status, dest.fp, 9, 0, 30); + if (status != BZ_OK) + throw std::runtime_error ("failed to open bzip2 stream"); + } + + void + deflate (void) + { + const std::size_t buf_len = 8192; + char buf[buf_len]; + std::size_t n_read; + while ((n_read = std::fread (buf, sizeof (buf[0]), buf_len, source.fp)) != 0) + { + if (std::ferror (source.fp)) + throw std::runtime_error ("failed to read from source file"); + BZ2_bzWrite (&status, bz, buf, n_read); + if (status == BZ_IO_ERROR) + throw std::runtime_error ("failed to write or compress"); + } + if (std::ferror (source.fp)) + throw std::runtime_error ("failed to read from source file"); + } + + void + close (void) + { + int abandon = (status == BZ_IO_ERROR) ? 1 : 0; + BZ2_bzWriteClose (&status, bz, abandon, 0, 0); + if (status != BZ_OK) + throw std::runtime_error ("failed to close bzip2 stream"); + bz = nullptr; + + // We have no error handling for failing to close source, let + // the destructor close it. + dest.close (); + } + + ~zipper () + { + if (bz != nullptr) + BZ2_bzWriteClose (&status, bz, 1, 0, 0); + } + }; + + public: + static const constexpr char* extension = ".bz2"; + + static void + zip (const std::string& source_path, const std::string& dest_path) + { + bz2::zipper z (source_path, dest_path); + z.deflate (); + z.close (); + } + + }; + +#endif + + // Note about zlib and gzip + // + // gzip is a format for compressed single files. zlib is a format + // designed for in-memory and communication channel applications. + // gzip uses the same format internally for the compressed data but + // has different headers and trailers. + // + // zlib is also a library but gzip is not. Very old versions of zlib do + // not include functions to create useful gzip headers and trailers: + // + // Note that you cannot specify special gzip header contents (e.g. + // a file name or modification date), nor will inflate tell you what + // was in the gzip header. If you need to customize the header or + // see what's in it, you can use the raw deflate and inflate + // operations and the crc32() function and roll your own gzip + // encoding and decoding. Read the gzip RFC 1952 for details of the + // header and trailer format. + // zlib FAQ + // + // Recent versions (on which we are already dependent) have deflateInit2() + // to do it. We still need to get the right metadata for the header + // ourselves though. + // + // The header is defined in RFC #1952 + // GZIP file format specification version 4.3 + + +#if defined (HAVE_Z) + + class gz + { + private: + + // Util class to get a non-const char* + class uchar_array + { + public: + // Bytef is a typedef for unsigned char + unsigned char* p; + + uchar_array (const std::string& str) + { + p = new Bytef[str.length () +1]; + std::strcpy (reinterpret_cast<char*> (p), str.c_str ()); + } + + ~uchar_array (void) { delete[] p; } + }; + + class gzip_header : public gz_header + { + private: + // This must be kept for gz_header.name + uchar_array basename; + + public: + gzip_header (const std::string& source_path) + : basename (octave::sys::env::base_pathname (source_path)) + { + const octave::sys::file_stat source_stat (source_path); + if (! source_stat) + throw std::runtime_error ("unable to stat source file"); + + // time_t may be a signed int in which case it will be a + // positive number so it is safe to uLong. Or is it? Can + // unix_time really never be negative? + time = uLong (source_stat.mtime ().unix_time ()); + + // If FNAME is set, an original file name is present, + // terminated by a zero byte. The name must consist of ISO + // 8859-1 (LATIN-1) characters; on operating systems using + // EBCDIC or any other character set for file names, the name + // must be translated to the ISO LATIN-1 character set. This + // is the original name of the file being compressed, with any + // directory components removed, and, if the file being + // compressed is on a file system with case insensitive names, + // forced to lower case. + name = basename.p; + + // If we don't set it to Z_NULL, then it will set FCOMMENT (4th bit) + // on the FLG byte, and then write {0, 3} comment. + comment = Z_NULL; + + // Seems to already be the default but we are not taking chances. + extra = Z_NULL; + + // We do not want a CRC for the header. That would be only 2 more + // bytes, and maybe it would be a good thing but we want to generate + // gz files similar to the default gzip application. + hcrc = 0; + + // OS (Operating System): + // 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32) + // 1 - Amiga + // 2 - VMS (or OpenVMS) + // 3 - Unix + // 4 - VM/CMS + // 5 - Atari TOS + // 6 - HPFS filesystem (OS/2, NT) + // 7 - Macintosh + // 8 - Z-System + // 9 - CP/M + // 10 - TOPS-20 + // 11 - NTFS filesystem (NT) + // 12 - QDOS + // 13 - Acorn RISCOS + // 255 - unknown + // + // The list is problematic because it mixes OS and filesystem. It + // also does not specify whether filesystem relates to source or + // destination file. + +#if defined (__WIN32__) + // Or should it be 11? + os = 0; +#elif defined (__APPLE__) + os = 7; +#else + // Unix by default? + os = 3; +#endif + } + }; + + class zipper + { + private: + CFile source; + CFile dest; + gzip_header header; + z_stream* strm; + + public: + zipper (const std::string& source_path, const std::string& dest_path) + : source (source_path, "rb"), dest (dest_path, "wb"), + header (source_path), strm (new z_stream) + { + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + } + + void + deflate () + { + // int deflateInit2 (z_streamp strm, + // int level, // compression level (default is 8) + // int method, + // int windowBits, // 15 (default) + 16 (gzip format) + // int memLevel, // memory usage (default is 8) + // int strategy); + int status = deflateInit2 (strm, 8, Z_DEFLATED, 31, 8, + Z_DEFAULT_STRATEGY); + if (status != Z_OK) + throw std::runtime_error ("failed to open zlib stream"); + + deflateSetHeader (strm, &header); + + const std::size_t buf_len = 8192; + unsigned char buf_in[buf_len]; + unsigned char buf_out[buf_len]; + + while ((strm->avail_in = std::fread (buf_in, sizeof (buf_in[0]), + buf_len, source.fp)) != 0) + { + if (std::ferror (source.fp)) + throw std::runtime_error ("failed to read source file"); + + strm->next_in = buf_in; + const int flush = std::feof (source.fp) ? Z_FINISH : Z_NO_FLUSH; + + // If deflate returns Z_OK and with zero avail_out, it must be + // called again after making room in the output buffer because + // there might be more output pending. + do + { + strm->avail_out = buf_len; + strm->next_out = buf_out; + status = ::deflate (strm, flush); + if (status == Z_STREAM_ERROR) + throw std::runtime_error ("failed to deflate"); + + std::fwrite (buf_out, sizeof (buf_out[0]), + buf_len - strm->avail_out, dest.fp); + if (std::ferror (dest.fp)) + throw std::runtime_error ("failed to write file"); + } + while (strm->avail_out == 0); + + if (strm->avail_in != 0) + throw std::runtime_error ("failed to wrote file"); + } + } + + void + close (void) + { + if (deflateEnd (strm) != Z_OK) + throw std::runtime_error ("failed to close zlib stream"); + strm = nullptr; + + // We have no error handling for failing to close source, let + // the destructor close it. + dest.close (); + } + + ~zipper (void) + { + if (strm) + deflateEnd (strm); + delete strm; + } + }; + + public: + static const constexpr char* extension = ".gz"; + + static void + zip (const std::string& source_path, const std::string& dest_path) + { + gz::zipper z (source_path, dest_path); + z.deflate (); + z.close (); + } + }; + +#endif + + + template<typename X> + string_vector + xzip (const Array<std::string>& source_patterns, + const std::function<std::string(const std::string&)>& mk_dest_path) + { + std::list<std::string> dest_paths; + + std::function<void(const std::string&)> walk; + walk = [&walk, &mk_dest_path, &dest_paths] (const std::string& path) -> void + { + const octave::sys::file_stat fs (path); + // is_dir and is_reg will return false if failed to stat. + if (fs.is_dir ()) + { + octave::sys::dir_entry dir (path); + if (dir) + { + // Collect the whole list of filenames first, before recursion + // to avoid issues with infinite loop if the action generates + // files in the same directory (highly likely). + string_vector dirlist = dir.read (); + for (octave_idx_type i = 0; i < dirlist.numel (); i++) + if (dirlist(i) != "." && dirlist(i) != "..") + walk (octave::sys::file_ops::concat (path, dirlist(i))); + } + // Note that we skip any problem with directories. + } + else if (fs.is_reg ()) + { + const std::string dest_path = mk_dest_path (path); + try + { + X::zip (path, dest_path); + } + catch (...) + { + // Error "handling" is not including filename on the output list. + // Also remove created file which maybe was not even created + // in the first place. Note that it is possible for the file + // to exist in the first place and for X::zip to not have + // clobber it yet but we remove it anyway by design. + octave::sys::unlink (dest_path); + return; + } + dest_paths.push_front (dest_path); + } + // Skip all other file types and errors. + return; + }; + + for (octave_idx_type i = 0; i < source_patterns.numel (); i++) + { + const glob_match pattern (octave::sys::file_ops::tilde_expand (source_patterns(i))); + const string_vector filepaths = pattern.glob (); + for (octave_idx_type j = 0; j < filepaths.numel (); j++) + walk (filepaths(j)); + } + return string_vector (dest_paths); + } + + + template<typename X> + string_vector + xzip (const Array<std::string>& source_patterns) + { + const std::string ext = X::extension; + const std::function<std::string(const std::string&)> mk_dest_path + = [&ext] (const std::string& source_path) -> std::string + { + return source_path + ext; + }; + return xzip<X> (source_patterns, mk_dest_path); + } + + template<typename X> + string_vector + xzip (const Array<std::string>& source_patterns, const std::string& out_dir) + { + const std::string ext = X::extension; + const std::function<std::string(const std::string&)> mk_dest_path + = [&out_dir, &ext] (const std::string& source_path) -> std::string + { + const std::string basename = octave::sys::env::base_pathname (source_path); + return octave::sys::file_ops::concat (out_dir, basename + ext); + }; + + // We don't care if mkdir fails. Maybe it failed because it already + // exists, or maybe it can't bre created. If the first, then there's + // nothing to do, if the later, then it will be handled later. Any + // is to be handled by not listing files in the output. + octave::sys::mkdir (out_dir, 0777); + return xzip<X> (source_patterns, mk_dest_path); + } + + template<typename X> + static octave_value_list + xzip (const std::string& func_name, const octave_value_list& args) + { + const octave_idx_type nargin = args.length (); + if (nargin < 1 || nargin > 2) + print_usage (); + + const Array<std::string> source_patterns + = args(0).xcellstr_value ("%s: FILES must be a character array or cellstr", + func_name.c_str ()); + if (nargin == 1) + return octave_value (Cell (xzip<X> (source_patterns))); + else // nargin == 2 + { + const std::string out_dir = args(1).string_value (); + return octave_value (Cell (xzip<X> (source_patterns, out_dir))); + } + } +} + +DEFUN_DLD (gzip, args, , + doc: /* -*- texinfo -*- +@deftypefn {} {@var{filelist} =} gzip (@var{files}) +@deftypefnx {} {@var{filelist} =} gzip (@var{files}, @var{dir}) +Compress the list of files and directories specified in @var{files}. + +@var{files} is a character array or cell array of strings. Shell wildcards +in the filename such as @samp{*} or @samp{?} are accepted and expanded. +Each file is compressed separately and a new file with a @file{".gz"} +extension is created. The original files are not modified, but existing +compressed files will be silently overwritten. If a directory is +specified then @code{gzip} recursively compresses all files in the +directory. + +If @var{dir} is defined the compressed files are placed in this directory, +rather than the original directory where the uncompressed file resides. +Note that this does not replicate a directory tree in @var{dir} which may +lead to files overwriting each other if there are multiple files with the +same name. + +If @var{dir} does not exist it is created. + +The optional output @var{filelist} is a list of the compressed files. +@seealso{gunzip, unpack, bzip2, zip, tar} +@end deftypefn */) +{ +#if defined (HAVE_Z) + + return octave::xzip<octave::gz> ("gzip", args); + +#else + + octave_unused_parameter (args); + + err_disabled_feature ("gzip", "gzip"); + +#endif +} + +/* +%!error gzip () +%!error gzip ("1", "2", "3") +%!error <FILES must be a character array or cellstr|was unavailable or disabled> gzip (1) +*/ + +DEFUN_DLD (bzip2, args, , + doc: /* -*- texinfo -*- +@deftypefn {} {@var{filelist} =} bzip2 (@var{files}) +@deftypefnx {} {@var{filelist} =} bzip2 (@var{files}, @var{dir}) +Compress the list of files specified in @var{files}. + +@var{files} is a character array or cell array of strings. Shell wildcards +in the filename such as @samp{*} or @samp{?} are accepted and expanded. +Each file is compressed separately and a new file with a @file{".bz2"} +extension is created. The original files are not modified, but existing +compressed files will be silently overwritten. + +If @var{dir} is defined the compressed files are placed in this directory, +rather than the original directory where the uncompressed file resides. +Note that this does not replicate a directory tree in @var{dir} which may +lead to files overwriting each other if there are multiple files with the +same name. + +If @var{dir} does not exist it is created. + +The optional output @var{filelist} is a list of the compressed files. +@seealso{bunzip2, unpack, gzip, zip, tar} +@end deftypefn */) +{ +#if defined (HAVE_BZ2) + + return octave::xzip<octave::bz2> ("bzip2", args); + +#else + + octave_unused_parameter (args); + + err_disabled_feature ("bzip2", "bzip2"); + +#endif +} + +// Tests for both gzip/bzip2 and gunzip/bunzip2 +/* + +## Takes a single argument, a function handle for the test. This other +## function must accept two arguments, a directory for the tests, and +## a cell array with zip function, unzip function, and file extension. + +%!function run_test_function (test_function) +%! enabled_zippers = struct ("zip", {}, "unzip", {}, "ext", {}); +%! if (__octave_config_info__ ().build_features.BZ2) +%! enabled_zippers(end+1).zip = @bzip2; +%! enabled_zippers(end).unzip = @bunzip2; +%! enabled_zippers(end).ext = ".bz2"; +%! endif +%! if (__octave_config_info__ ().build_features.Z) +%! enabled_zippers(end+1).zip = @gzip; +%! enabled_zippers(end).unzip = @gunzip; +%! enabled_zippers(end).ext = ".gz"; +%! endif +%! +%! for z = enabled_zippers +%! test_dir = tempname (); +%! if (! mkdir (test_dir)) +%! error ("unable to create directory for tests"); +%! endif +%! unwind_protect +%! test_function (test_dir, z) +%! unwind_protect_cleanup +%! confirm_recursive_rmdir (false, "local"); +%! rmdir (test_dir, "s"); +%! end_unwind_protect +%! endfor +%!endfunction + +%!function create_file (fpath, data) +%! fid = fopen (fpath, "wb"); +%! if (fid < 0) +%! error ("unable to open file for writing"); +%! endif +%! if (fwrite (fid, data, class (data)) != numel (data)) +%! error ("unable to write to file"); +%! endif +%! if (fflush (fid) || fclose (fid)) +%! error ("unable to flush or close file"); +%! endif +%!endfunction + +%!function unlink_or_error (filepath) +%! [err, msg] = unlink (filepath); +%! if (err) +%! error ("unable to remove file required for the test"); +%! endif +%!endfunction + +## Test with large files because of varied buffer size +%!function test_large_file (test_dir, z) +%! test_file = tempname (test_dir); +%! create_file (test_file, rand (500000, 1)); +%! md5 = hash ("md5", fileread (test_file)); +%! +%! z_file = [test_file z.ext]; +%! z_filelist = z.zip (test_file); +%! assert (z_filelist, {z_file}) +%! +%! unlink_or_error (test_file); +%! uz_filelist = z.unzip (z_file); +%! assert (uz_filelist, {test_file}) +%! +%! assert (hash ("md5", fileread (test_file)), md5) +%!endfunction +%!test run_test_function (@test_large_file) + +## Test that xzipped files are rexzipped +%!function test_z_z (test_dir, z) +%! ori_file = tempname (test_dir); +%! create_file (ori_file, rand (100, 1)); +%! md5_ori = hash ("md5", fileread (ori_file)); +%! +%! z_file = [ori_file z.ext]; +%! z_filelist = z.zip (ori_file); +%! assert (z_filelist, {z_file}) # check output +%! assert (exist (z_file), 2) # confirm file exists +%! assert (exist (ori_file), 2) # and did not remove original file +%! +%! unlink_or_error (ori_file); +%! uz_filelist = z.unzip (z_file); +%! assert (uz_filelist, {ori_file}) # bug #48598 +%! assert (hash ("md5", fileread (ori_file)), md5_ori) +%! assert (exist (z_file), 2) # bug #48597 +%! +%! ## xzip should dutifully re-xzip files even if they already are zipped +%! z_z_file = [z_file z.ext]; +%! z_z_filelist = z.zip (z_file); +%! assert (z_z_filelist, {z_z_file}) # check output +%! assert (exist (z_z_file), 2) # confirm file exists +%! assert (exist (z_file), 2) +%! +%! md5_z = hash ("md5", fileread (z_file)); +%! unlink_or_error (z_file); +%! uz_z_filelist = z.unzip (z_z_file); +%! assert (uz_z_filelist, {z_file}) # bug #48598 +%! assert (exist (z_z_file), 2) # bug #48597 +%! assert (hash ("md5", fileread (z_file)), md5_z) +%!endfunction +%!test run_test_function (@test_z_z) + +%!function test_xzip_dir (test_dir, z) # bug #43431 +%! fpaths = fullfile (test_dir, {"test1", "test2", "test3"}); +%! md5s = cell (1, 3); +%! for idx = 1:numel(fpaths) +%! create_file (fpaths{idx}, rand (100, 1)); +%! md5s(idx) = hash ("md5", fileread (fpaths{idx})); +%! endfor +%! +%! test_dir = [test_dir filesep()]; +%! +%! z_files = strcat (fpaths, z.ext); +%! z_filelist = z.zip (test_dir); +%! assert (sort (z_filelist), z_files(:)) +%! for idx = 1:numel(fpaths) +%! assert (exist (z_files{idx}), 2) +%! unlink_or_error (fpaths{idx}); +%! endfor +%! +%! ## only gunzip handles directory (bunzip2 should too though) +%! if (z.unzip == @gunzip) +%! uz_filelist = z.unzip (test_dir); +%! else +%! uz_filelist = cell (1, numel (z_filelist)); +%! for idx = 1:numel(z_filelist) +%! uz_filelist(idx) = z.unzip (z_filelist{idx}); +%! endfor +%! endif +%! assert (sort (uz_filelist), fpaths(:)) # bug #48598 +%! for idx = 1:numel(fpaths) +%! assert (hash ("md5", fileread (fpaths{idx})), md5s{idx}) +%! endfor +%!endfunction +%!test run_test_function (@test_xzip_dir) + +%!function test_save_to_dir (test_dir, z) +%! filename = "test-file"; +%! filepath = fullfile (test_dir, filename); +%! create_file (filepath, rand (100, 1)); +%! md5 = hash ("md5", fileread (filepath)); +%! +%! ## test with existing and non-existing directory +%! out_dirs = {tempname (test_dir), tempname (test_dir)}; +%! if (! mkdir (out_dirs{1})) +%! error ("unable to create directory for test"); +%! endif +%! for idx = 1:numel(out_dirs) +%! out_dir = out_dirs{idx}; +%! uz_file = fullfile (out_dir, filename); +%! z_file = [uz_file z.ext]; +%! +%! z_filelist = z.zip (filepath, out_dir); +%! assert (z_filelist, {z_file}) +%! assert (exist (z_file, "file"), 2) +%! +%! uz_filelist = z.unzip (z_file); +%! assert (uz_filelist, {uz_file}) # bug #48598 +%! +%! assert (hash ("md5", fileread (uz_file)), md5) +%! endfor +%!endfunction +%!test run_test_function (@test_save_to_dir) +*/
--- a/libinterp/dldfcn/module-files Thu Aug 18 16:18:27 2016 +0200 +++ b/libinterp/dldfcn/module-files Thu Aug 18 15:15:37 2016 +0100 @@ -16,7 +16,7 @@ convhulln.cc|$(QHULL_CPPFLAGS)|$(QHULL_LDFLAGS)|$(QHULL_LIBS) dmperm.cc|$(SPARSE_XCPPFLAGS)|$(SPARSE_XLDFLAGS)|$(SPARSE_XLIBS) fftw.cc|$(FFTW_XCPPFLAGS)|$(FFTW_XLDFLAGS)|$(FFTW_XLIBS) +gzip.cc|$(Z_CPPFLAGS) $(BZ2_CPPFLAGS)|$(Z_LDFLAGS) $(BZ2_LDFLAGS)|$(Z_LIBS) $(BZ2_LIBS) qr.cc|$(QRUPDATE_CPPFLAGS) $(SPARSE_XCPPFLAGS)|$(QRUPDATE_LDFLAGS) $(SPARSE_XLDFLAGS)|$(QRUPDATE_LIBS) $(SPARSE_XLIBS) symbfact.cc|$(SPARSE_XCPPFLAGS)|$(SPARSE_XLDFLAGS)|$(SPARSE_XLIBS) symrcm.cc|$(SPARSE_XCPPFLAGS)|$(SPARSE_XLDFLAGS)|$(SPARSE_XLIBS) -xzip.cc|$(Z_CPPFLAGS) $(BZ2_CPPFLAGS)|$(Z_LDFLAGS) $(BZ2_LDFLAGS)|$(Z_LIBS) $(BZ2_LIBS)
--- a/libinterp/dldfcn/xzip.cc Thu Aug 18 16:18:27 2016 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,782 +0,0 @@ -// Copyright (C) 2016 Carnë Draug -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see <http://www.gnu.org/licenses/>. - -//! Octave interface to the compression and uncompression libraries. -/*! - This was originally implemented as an m file which directly called - bzip2 and gzip applications. This may look simpler but causes some - issues (see bug #43431) because we have no control over the output - file: - - - created file is always in the same directory as the original file; - - automatically skip files that already have gz/bz2/etc extension; - - some older versions lack the --keep option. - - In addition, because system() does not have a method that allows - passing a list of arguments, there is the issue of having to escape - filenames. - - A solution is to pipe file contents into the applications instead of - filenames. However, that solution causes: - - # missing file header with original file information; - # implementing ourselves the recursive transversion of directories; - # do the above in a m file which will be slow; - # popen2 is frail on windows. - -*/ - -#if defined (HAVE_CONFIG_H) -# include "config.h" -#endif - -#include <cstdio> -#include <cstring> - -#include <string> -#include <list> -#include <functional> -#include <stdexcept> -#include <iostream> -#include <fstream> - -#if defined (HAVE_BZLIB_H) -# include <bzlib.h> -#endif - -#if defined (HAVE_ZLIB_H) -# include <zlib.h> -#endif - -#include "Array.h" -#include "str-vec.h" -#include "glob-match.h" -#include "file-ops.h" -#include "dir-ops.h" -#include "file-stat.h" -#include "oct-env.h" - -#include "defun-dld.h" -#include "defun-int.h" -#include "errwarn.h" - -namespace octave -{ - //! RIIA wrapper for std::FILE* - /*! If error handling is available for failing to close the file, use - the close method which throws. - - If the file has been closed, fp is set to nullptr. Remember that - behavior is undefined if the value of the pointer stream is used - after fclose. - */ - - class CFile - { - public: - std::FILE* fp; - - CFile (const std::string& path, const std::string& mode) - { - fp = std::fopen (path.c_str (), mode.c_str ()); - if (! fp) - throw std::runtime_error ("unable to open file"); - } - - void - close (void) - { - if (std::fclose (fp)) - throw std::runtime_error ("unable to close file"); - fp = nullptr; - } - - ~CFile () - { - if (fp) - std::fclose (fp); - } - }; - -#if defined (HAVE_BZ2) - - class bz2 - { - private: - class zipper - { - private: - int status = BZ_OK; - CFile source; - CFile dest; - BZFILE* bz; - - public: - zipper (const std::string& source_path, const std::string& dest_path) - : source (source_path, "rb"), dest (dest_path, "wb") - { - bz = BZ2_bzWriteOpen (&status, dest.fp, 9, 0, 30); - if (status != BZ_OK) - throw std::runtime_error ("failed to open bzip2 stream"); - } - - void - deflate (void) - { - const std::size_t buf_len = 8192; - char buf[buf_len]; - std::size_t n_read; - while ((n_read = std::fread (buf, sizeof (buf[0]), buf_len, source.fp)) != 0) - { - if (std::ferror (source.fp)) - throw std::runtime_error ("failed to read from source file"); - BZ2_bzWrite (&status, bz, buf, n_read); - if (status == BZ_IO_ERROR) - throw std::runtime_error ("failed to write or compress"); - } - if (std::ferror (source.fp)) - throw std::runtime_error ("failed to read from source file"); - } - - void - close (void) - { - int abandon = (status == BZ_IO_ERROR) ? 1 : 0; - BZ2_bzWriteClose (&status, bz, abandon, 0, 0); - if (status != BZ_OK) - throw std::runtime_error ("failed to close bzip2 stream"); - bz = nullptr; - - // We have no error handling for failing to close source, let - // the destructor close it. - dest.close (); - } - - ~zipper () - { - if (bz != nullptr) - BZ2_bzWriteClose (&status, bz, 1, 0, 0); - } - }; - - public: - static const constexpr char* extension = ".bz2"; - - static void - zip (const std::string& source_path, const std::string& dest_path) - { - bz2::zipper z (source_path, dest_path); - z.deflate (); - z.close (); - } - - }; - -#endif - - // Note about zlib and gzip - // - // gzip is a format for compressed single files. zlib is a format - // designed for in-memory and communication channel applications. - // gzip uses the same format internally for the compressed data but - // has different headers and trailers. - // - // zlib is also a library but gzip is not. Very old versions of zlib do - // not include functions to create useful gzip headers and trailers: - // - // Note that you cannot specify special gzip header contents (e.g. - // a file name or modification date), nor will inflate tell you what - // was in the gzip header. If you need to customize the header or - // see what's in it, you can use the raw deflate and inflate - // operations and the crc32() function and roll your own gzip - // encoding and decoding. Read the gzip RFC 1952 for details of the - // header and trailer format. - // zlib FAQ - // - // Recent versions (on which we are already dependent) have deflateInit2() - // to do it. We still need to get the right metadata for the header - // ourselves though. - // - // The header is defined in RFC #1952 - // GZIP file format specification version 4.3 - - -#if defined (HAVE_Z) - - class gz - { - private: - - // Util class to get a non-const char* - class uchar_array - { - public: - // Bytef is a typedef for unsigned char - unsigned char* p; - - uchar_array (const std::string& str) - { - p = new Bytef[str.length () +1]; - std::strcpy (reinterpret_cast<char*> (p), str.c_str ()); - } - - ~uchar_array (void) { delete[] p; } - }; - - class gzip_header : public gz_header - { - private: - // This must be kept for gz_header.name - uchar_array basename; - - public: - gzip_header (const std::string& source_path) - : basename (octave::sys::env::base_pathname (source_path)) - { - const octave::sys::file_stat source_stat (source_path); - if (! source_stat) - throw std::runtime_error ("unable to stat source file"); - - // time_t may be a signed int in which case it will be a - // positive number so it is safe to uLong. Or is it? Can - // unix_time really never be negative? - time = uLong (source_stat.mtime ().unix_time ()); - - // If FNAME is set, an original file name is present, - // terminated by a zero byte. The name must consist of ISO - // 8859-1 (LATIN-1) characters; on operating systems using - // EBCDIC or any other character set for file names, the name - // must be translated to the ISO LATIN-1 character set. This - // is the original name of the file being compressed, with any - // directory components removed, and, if the file being - // compressed is on a file system with case insensitive names, - // forced to lower case. - name = basename.p; - - // If we don't set it to Z_NULL, then it will set FCOMMENT (4th bit) - // on the FLG byte, and then write {0, 3} comment. - comment = Z_NULL; - - // Seems to already be the default but we are not taking chances. - extra = Z_NULL; - - // We do not want a CRC for the header. That would be only 2 more - // bytes, and maybe it would be a good thing but we want to generate - // gz files similar to the default gzip application. - hcrc = 0; - - // OS (Operating System): - // 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32) - // 1 - Amiga - // 2 - VMS (or OpenVMS) - // 3 - Unix - // 4 - VM/CMS - // 5 - Atari TOS - // 6 - HPFS filesystem (OS/2, NT) - // 7 - Macintosh - // 8 - Z-System - // 9 - CP/M - // 10 - TOPS-20 - // 11 - NTFS filesystem (NT) - // 12 - QDOS - // 13 - Acorn RISCOS - // 255 - unknown - // - // The list is problematic because it mixes OS and filesystem. It - // also does not specify whether filesystem relates to source or - // destination file. - -#if defined (__WIN32__) - // Or should it be 11? - os = 0; -#elif defined (__APPLE__) - os = 7; -#else - // Unix by default? - os = 3; -#endif - } - }; - - class zipper - { - private: - CFile source; - CFile dest; - gzip_header header; - z_stream* strm; - - public: - zipper (const std::string& source_path, const std::string& dest_path) - : source (source_path, "rb"), dest (dest_path, "wb"), - header (source_path), strm (new z_stream) - { - strm->zalloc = Z_NULL; - strm->zfree = Z_NULL; - strm->opaque = Z_NULL; - } - - void - deflate () - { - // int deflateInit2 (z_streamp strm, - // int level, // compression level (default is 8) - // int method, - // int windowBits, // 15 (default) + 16 (gzip format) - // int memLevel, // memory usage (default is 8) - // int strategy); - int status = deflateInit2 (strm, 8, Z_DEFLATED, 31, 8, - Z_DEFAULT_STRATEGY); - if (status != Z_OK) - throw std::runtime_error ("failed to open zlib stream"); - - deflateSetHeader (strm, &header); - - const std::size_t buf_len = 8192; - unsigned char buf_in[buf_len]; - unsigned char buf_out[buf_len]; - - while ((strm->avail_in = std::fread (buf_in, sizeof (buf_in[0]), - buf_len, source.fp)) != 0) - { - if (std::ferror (source.fp)) - throw std::runtime_error ("failed to read source file"); - - strm->next_in = buf_in; - const int flush = std::feof (source.fp) ? Z_FINISH : Z_NO_FLUSH; - - // If deflate returns Z_OK and with zero avail_out, it must be - // called again after making room in the output buffer because - // there might be more output pending. - do - { - strm->avail_out = buf_len; - strm->next_out = buf_out; - status = ::deflate (strm, flush); - if (status == Z_STREAM_ERROR) - throw std::runtime_error ("failed to deflate"); - - std::fwrite (buf_out, sizeof (buf_out[0]), - buf_len - strm->avail_out, dest.fp); - if (std::ferror (dest.fp)) - throw std::runtime_error ("failed to write file"); - } - while (strm->avail_out == 0); - - if (strm->avail_in != 0) - throw std::runtime_error ("failed to wrote file"); - } - } - - void - close (void) - { - if (deflateEnd (strm) != Z_OK) - throw std::runtime_error ("failed to close zlib stream"); - strm = nullptr; - - // We have no error handling for failing to close source, let - // the destructor close it. - dest.close (); - } - - ~zipper (void) - { - if (strm) - deflateEnd (strm); - delete strm; - } - }; - - public: - static const constexpr char* extension = ".gz"; - - static void - zip (const std::string& source_path, const std::string& dest_path) - { - gz::zipper z (source_path, dest_path); - z.deflate (); - z.close (); - } - }; - -#endif - - - template<typename X> - string_vector - xzip (const Array<std::string>& source_patterns, - const std::function<std::string(const std::string&)>& mk_dest_path) - { - std::list<std::string> dest_paths; - - std::function<void(const std::string&)> walk; - walk = [&walk, &mk_dest_path, &dest_paths] (const std::string& path) -> void - { - const octave::sys::file_stat fs (path); - // is_dir and is_reg will return false if failed to stat. - if (fs.is_dir ()) - { - octave::sys::dir_entry dir (path); - if (dir) - { - // Collect the whole list of filenames first, before recursion - // to avoid issues with infinite loop if the action generates - // files in the same directory (highly likely). - string_vector dirlist = dir.read (); - for (octave_idx_type i = 0; i < dirlist.numel (); i++) - if (dirlist(i) != "." && dirlist(i) != "..") - walk (octave::sys::file_ops::concat (path, dirlist(i))); - } - // Note that we skip any problem with directories. - } - else if (fs.is_reg ()) - { - const std::string dest_path = mk_dest_path (path); - try - { - X::zip (path, dest_path); - } - catch (...) - { - // Error "handling" is not including filename on the output list. - // Also remove created file which maybe was not even created - // in the first place. Note that it is possible for the file - // to exist in the first place and for X::zip to not have - // clobber it yet but we remove it anyway by design. - octave::sys::unlink (dest_path); - return; - } - dest_paths.push_front (dest_path); - } - // Skip all other file types and errors. - return; - }; - - for (octave_idx_type i = 0; i < source_patterns.numel (); i++) - { - const glob_match pattern (octave::sys::file_ops::tilde_expand (source_patterns(i))); - const string_vector filepaths = pattern.glob (); - for (octave_idx_type j = 0; j < filepaths.numel (); j++) - walk (filepaths(j)); - } - return string_vector (dest_paths); - } - - - template<typename X> - string_vector - xzip (const Array<std::string>& source_patterns) - { - const std::string ext = X::extension; - const std::function<std::string(const std::string&)> mk_dest_path - = [&ext] (const std::string& source_path) -> std::string - { - return source_path + ext; - }; - return xzip<X> (source_patterns, mk_dest_path); - } - - template<typename X> - string_vector - xzip (const Array<std::string>& source_patterns, const std::string& out_dir) - { - const std::string ext = X::extension; - const std::function<std::string(const std::string&)> mk_dest_path - = [&out_dir, &ext] (const std::string& source_path) -> std::string - { - const std::string basename = octave::sys::env::base_pathname (source_path); - return octave::sys::file_ops::concat (out_dir, basename + ext); - }; - - // We don't care if mkdir fails. Maybe it failed because it already - // exists, or maybe it can't bre created. If the first, then there's - // nothing to do, if the later, then it will be handled later. Any - // is to be handled by not listing files in the output. - octave::sys::mkdir (out_dir, 0777); - return xzip<X> (source_patterns, mk_dest_path); - } - - template<typename X> - static octave_value_list - xzip (const std::string& func_name, const octave_value_list& args) - { - const octave_idx_type nargin = args.length (); - if (nargin < 1 || nargin > 2) - print_usage (); - - const Array<std::string> source_patterns - = args(0).xcellstr_value ("%s: FILES must be a character array or cellstr", - func_name.c_str ()); - if (nargin == 1) - return octave_value (Cell (xzip<X> (source_patterns))); - else // nargin == 2 - { - const std::string out_dir = args(1).string_value (); - return octave_value (Cell (xzip<X> (source_patterns, out_dir))); - } - } -} - -DEFUN_DLD (gzip, args, , - doc: /* -*- texinfo -*- -@deftypefn {} {@var{filelist} =} gzip (@var{files}) -@deftypefnx {} {@var{filelist} =} gzip (@var{files}, @var{dir}) -Compress the list of files and directories specified in @var{files}. - -@var{files} is a character array or cell array of strings. Shell wildcards -in the filename such as @samp{*} or @samp{?} are accepted and expanded. -Each file is compressed separately and a new file with a @file{".gz"} -extension is created. The original files are not modified, but existing -compressed files will be silently overwritten. If a directory is -specified then @code{gzip} recursively compresses all files in the -directory. - -If @var{dir} is defined the compressed files are placed in this directory, -rather than the original directory where the uncompressed file resides. -Note that this does not replicate a directory tree in @var{dir} which may -lead to files overwriting each other if there are multiple files with the -same name. - -If @var{dir} does not exist it is created. - -The optional output @var{filelist} is a list of the compressed files. -@seealso{gunzip, unpack, bzip2, zip, tar} -@end deftypefn */) -{ -#if defined (HAVE_Z) - - return octave::xzip<octave::gz> ("gzip", args); - -#else - - octave_unused_parameter (args); - - err_disabled_feature ("gzip", "gzip"); - -#endif -} - -/* -%!error gzip () -%!error gzip ("1", "2", "3") -%!error <FILES must be a character array or cellstr|was unavailable or disabled> gzip (1) -*/ - -DEFUN_DLD (bzip2, args, , - doc: /* -*- texinfo -*- -@deftypefn {} {@var{filelist} =} bzip2 (@var{files}) -@deftypefnx {} {@var{filelist} =} bzip2 (@var{files}, @var{dir}) -Compress the list of files specified in @var{files}. - -@var{files} is a character array or cell array of strings. Shell wildcards -in the filename such as @samp{*} or @samp{?} are accepted and expanded. -Each file is compressed separately and a new file with a @file{".bz2"} -extension is created. The original files are not modified, but existing -compressed files will be silently overwritten. - -If @var{dir} is defined the compressed files are placed in this directory, -rather than the original directory where the uncompressed file resides. -Note that this does not replicate a directory tree in @var{dir} which may -lead to files overwriting each other if there are multiple files with the -same name. - -If @var{dir} does not exist it is created. - -The optional output @var{filelist} is a list of the compressed files. -@seealso{bunzip2, unpack, gzip, zip, tar} -@end deftypefn */) -{ -#if defined (HAVE_BZ2) - - return octave::xzip<octave::bz2> ("bzip2", args); - -#else - - octave_unused_parameter (args); - - err_disabled_feature ("bzip2", "bzip2"); - -#endif -} - -// Tests for both gzip/bzip2 and gunzip/bunzip2 -/* - -## Takes a single argument, a function handle for the test. This other -## function must accept two arguments, a directory for the tests, and -## a cell array with zip function, unzip function, and file extension. - -%!function run_test_function (test_function) -%! enabled_zippers = struct ("zip", {}, "unzip", {}, "ext", {}); -%! if (__octave_config_info__ ().build_features.BZ2) -%! enabled_zippers(end+1).zip = @bzip2; -%! enabled_zippers(end).unzip = @bunzip2; -%! enabled_zippers(end).ext = ".bz2"; -%! endif -%! if (__octave_config_info__ ().build_features.Z) -%! enabled_zippers(end+1).zip = @gzip; -%! enabled_zippers(end).unzip = @gunzip; -%! enabled_zippers(end).ext = ".gz"; -%! endif -%! -%! for z = enabled_zippers -%! test_dir = tempname (); -%! if (! mkdir (test_dir)) -%! error ("unable to create directory for tests"); -%! endif -%! unwind_protect -%! test_function (test_dir, z) -%! unwind_protect_cleanup -%! confirm_recursive_rmdir (false, "local"); -%! rmdir (test_dir, "s"); -%! end_unwind_protect -%! endfor -%!endfunction - -%!function create_file (fpath, data) -%! fid = fopen (fpath, "wb"); -%! if (fid < 0) -%! error ("unable to open file for writing"); -%! endif -%! if (fwrite (fid, data, class (data)) != numel (data)) -%! error ("unable to write to file"); -%! endif -%! if (fflush (fid) || fclose (fid)) -%! error ("unable to flush or close file"); -%! endif -%!endfunction - -%!function unlink_or_error (filepath) -%! [err, msg] = unlink (filepath); -%! if (err) -%! error ("unable to remove file required for the test"); -%! endif -%!endfunction - -## Test with large files because of varied buffer size -%!function test_large_file (test_dir, z) -%! test_file = tempname (test_dir); -%! create_file (test_file, rand (500000, 1)); -%! md5 = hash ("md5", fileread (test_file)); -%! -%! z_file = [test_file z.ext]; -%! z_filelist = z.zip (test_file); -%! assert (z_filelist, {z_file}) -%! -%! unlink_or_error (test_file); -%! uz_filelist = z.unzip (z_file); -%! assert (uz_filelist, {test_file}) -%! -%! assert (hash ("md5", fileread (test_file)), md5) -%!endfunction -%!test run_test_function (@test_large_file) - -## Test that xzipped files are rexzipped -%!function test_z_z (test_dir, z) -%! ori_file = tempname (test_dir); -%! create_file (ori_file, rand (100, 1)); -%! md5_ori = hash ("md5", fileread (ori_file)); -%! -%! z_file = [ori_file z.ext]; -%! z_filelist = z.zip (ori_file); -%! assert (z_filelist, {z_file}) # check output -%! assert (exist (z_file), 2) # confirm file exists -%! assert (exist (ori_file), 2) # and did not remove original file -%! -%! unlink_or_error (ori_file); -%! uz_filelist = z.unzip (z_file); -%! assert (uz_filelist, {ori_file}) # bug #48598 -%! assert (hash ("md5", fileread (ori_file)), md5_ori) -%! assert (exist (z_file), 2) # bug #48597 -%! -%! ## xzip should dutifully re-xzip files even if they already are zipped -%! z_z_file = [z_file z.ext]; -%! z_z_filelist = z.zip (z_file); -%! assert (z_z_filelist, {z_z_file}) # check output -%! assert (exist (z_z_file), 2) # confirm file exists -%! assert (exist (z_file), 2) -%! -%! md5_z = hash ("md5", fileread (z_file)); -%! unlink_or_error (z_file); -%! uz_z_filelist = z.unzip (z_z_file); -%! assert (uz_z_filelist, {z_file}) # bug #48598 -%! assert (exist (z_z_file), 2) # bug #48597 -%! assert (hash ("md5", fileread (z_file)), md5_z) -%!endfunction -%!test run_test_function (@test_z_z) - -%!function test_xzip_dir (test_dir, z) # bug #43431 -%! fpaths = fullfile (test_dir, {"test1", "test2", "test3"}); -%! md5s = cell (1, 3); -%! for idx = 1:numel(fpaths) -%! create_file (fpaths{idx}, rand (100, 1)); -%! md5s(idx) = hash ("md5", fileread (fpaths{idx})); -%! endfor -%! -%! test_dir = [test_dir filesep()]; -%! -%! z_files = strcat (fpaths, z.ext); -%! z_filelist = z.zip (test_dir); -%! assert (sort (z_filelist), z_files(:)) -%! for idx = 1:numel(fpaths) -%! assert (exist (z_files{idx}), 2) -%! unlink_or_error (fpaths{idx}); -%! endfor -%! -%! ## only gunzip handles directory (bunzip2 should too though) -%! if (z.unzip == @gunzip) -%! uz_filelist = z.unzip (test_dir); -%! else -%! uz_filelist = cell (1, numel (z_filelist)); -%! for idx = 1:numel(z_filelist) -%! uz_filelist(idx) = z.unzip (z_filelist{idx}); -%! endfor -%! endif -%! assert (sort (uz_filelist), fpaths(:)) # bug #48598 -%! for idx = 1:numel(fpaths) -%! assert (hash ("md5", fileread (fpaths{idx})), md5s{idx}) -%! endfor -%!endfunction -%!test run_test_function (@test_xzip_dir) - -%!function test_save_to_dir (test_dir, z) -%! filename = "test-file"; -%! filepath = fullfile (test_dir, filename); -%! create_file (filepath, rand (100, 1)); -%! md5 = hash ("md5", fileread (filepath)); -%! -%! ## test with existing and non-existing directory -%! out_dirs = {tempname (test_dir), tempname (test_dir)}; -%! if (! mkdir (out_dirs{1})) -%! error ("unable to create directory for test"); -%! endif -%! for idx = 1:numel(out_dirs) -%! out_dir = out_dirs{idx}; -%! uz_file = fullfile (out_dir, filename); -%! z_file = [uz_file z.ext]; -%! -%! z_filelist = z.zip (filepath, out_dir); -%! assert (z_filelist, {z_file}) -%! assert (exist (z_file, "file"), 2) -%! -%! uz_filelist = z.unzip (z_file); -%! assert (uz_filelist, {uz_file}) # bug #48598 -%! -%! assert (hash ("md5", fileread (uz_file)), md5) -%! endfor -%!endfunction -%!test run_test_function (@test_save_to_dir) -*/