changeset 22160:766f934db568

Rewrite gzip and bzip2 in C++ instead of using its applications (bug #43431) * bzip2.m, gzip.m, __xzip__.m: remove old implementation as m files that copy all files into a temporary directory and then call gzip or bzip2 application. Add several new tests and remove duplication of existing tests. * scripts/miscellaneous/module.mk: unlist removed files. * xzip.cc: new implementation of bzip2 and gzip functions making direct use of the libraries in C++. Also add more tests. * libinterp/dldfcn/module-files: list new file and required flags. * configure.ac: add check for bzip2 library.
author Carnë Draug <carandraug@octave.org>
date Sun, 26 Jun 2016 13:32:03 +0200
parents 63c806042c27
children 9babcd597676
files configure.ac libinterp/dldfcn/module-files libinterp/dldfcn/xzip.cc scripts/miscellaneous/bzip2.m scripts/miscellaneous/gzip.m scripts/miscellaneous/module.mk scripts/miscellaneous/private/__xzip__.m
diffstat 7 files changed, 712 insertions(+), 297 deletions(-) [+]
line wrap: on
line diff
--- a/configure.ac	Fri Jul 22 12:08:51 2016 -0400
+++ b/configure.ac	Sun Jun 26 13:32:03 2016 +0200
@@ -864,7 +864,15 @@
   AC_DEFINE(HAVE_ZLIB, 1, [Define to 1 if ZLIB is available.])
 fi
 
- ### Check for the LLVM library
+
+### Check for BZIP2 library.
+
+OCTAVE_CHECK_LIB(bz2, BZIP2,
+  [BZIP2 library not found.  Octave will not be able to compress or decompress bzip2 files.],
+  [bzlib.h], [BZ2_bzCompressInit])
+
+
+### Check for the LLVM library
 
 ENABLE_JIT=no
 AC_ARG_ENABLE([jit],
@@ -3367,6 +3375,9 @@
   ARPACK LDFLAGS:              $ARPACK_LDFLAGS
   ARPACK libraries:            $ARPACK_LIBS
   BLAS libraries:              $BLAS_LIBS
+  BZ2 CPPFLAGS:                $BZ2_CPPFLAGS
+  BZ2 LDFLAGS:                 $BZ2_LDFLAGS
+  BZ2 libraries:               $BZ2_LIBS
   CAMD CPPFLAGS:               $CAMD_CPPFLAGS
   CAMD LDFLAGS:                $CAMD_LDFLAGS
   CAMD libraries:              $CAMD_LIBS
--- a/libinterp/dldfcn/module-files	Fri Jul 22 12:08:51 2016 -0400
+++ b/libinterp/dldfcn/module-files	Sun Jun 26 13:32:03 2016 +0200
@@ -19,3 +19,4 @@
 qr.cc|$(QRUPDATE_CPPFLAGS) $(SPARSE_XCPPFLAGS)|$(QRUPDATE_LDFLAGS) $(SPARSE_XLDFLAGS)|$(QRUPDATE_LIBS) $(SPARSE_XLIBS)
 symbfact.cc|$(SPARSE_XCPPFLAGS)|$(SPARSE_XLDFLAGS)|$(SPARSE_XLIBS)
 symrcm.cc|$(SPARSE_XCPPFLAGS)|$(SPARSE_XLDFLAGS)|$(SPARSE_XLIBS)
+xzip.cc|$(Z_CPPFLAGS) $(BZ2_CPPFLAGS)|$(Z_LDFLAGS) $(BZ2_LDFLAGS)|$(Z_LIBS) $(BZ2_LIBS)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libinterp/dldfcn/xzip.cc	Sun Jun 26 13:32:03 2016 +0200
@@ -0,0 +1,698 @@
+// Copyright (C) 2016 Carnë Draug
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Octave interface to the compression and uncompression libraries.
+/*!
+  This was originally implemented as an m file which directly called
+  bzip2 and gzip applications.  This may look simpler but causes some
+  issues (see bug #43431) because we have no control over the output
+  file:
+
+    - created file is always in the same directory as the original file;
+    - automatically skip files that already have gz/bz2/etc extension;
+    - some olders versions miss the --keep option.
+
+  In addition, because system() does not have a method that allows
+  passing a list of arguments, there is the issue of having to escape
+  filenames.
+
+  A solution is to pipe file contents into the applications instead of
+  filenames.  However, that solution causes:
+
+    # missing file header with original file information;
+    # implementing ourselves the recursive transversion of directories;
+    # do the above in a m file which will be slow;
+    # popen2 is frail on windows.
+
+*/
+
+#if defined (HAVE_CONFIG_H)
+#  include "config.h"
+#endif
+
+#include <cstdio>
+#include <cstring>
+
+#include <string>
+#include <list>
+#include <functional>
+#include <stdexcept>
+#include <iostream>
+#include <fstream>
+
+#ifdef HAVE_BZLIB_H
+#  include <bzlib.h>
+#endif
+
+#ifdef HAVE_ZLIB_H
+#  include <zlib.h>
+#endif
+
+#include "Array.h"
+#include "str-vec.h"
+#include "glob-match.h"
+#include "file-ops.h"
+#include "dir-ops.h"
+#include "file-stat.h"
+#include "oct-env.h"
+
+#include "defun-dld.h"
+#include "defun-int.h"
+#include "errwarn.h"
+
+class CFile
+{
+public:
+  FILE* fp;
+
+  CFile (const std::string& path, const std::string& mode)
+  {
+    fp = std::fopen (path.c_str (), mode.c_str ());
+    if (! fp)
+      throw std::runtime_error ("unable to open file");
+  }
+
+  ~CFile ()
+  {
+    if (std::fclose (fp))
+      // Not pedantic.  If this is dest, maybe it failed to flush
+      // so we should signal this before someone removes the source.
+      throw std::runtime_error ("unable to close file");
+  }
+};
+
+#ifdef HAVE_BZ2
+class bz2
+{
+private:
+  class zipper
+  {
+  private:
+    int status = BZ_OK;
+    CFile source;
+    CFile dest;
+    BZFILE* bz;
+
+  public:
+    zipper (const std::string& source_path, const std::string& dest_path)
+      : source (source_path, "rb"), dest (dest_path, "wb")
+    {
+      bz = BZ2_bzWriteOpen (&status, dest.fp, 9, 0, 30);
+      if (status != BZ_OK)
+        throw std::runtime_error ("failed to open bzip2 stream");
+    }
+
+    void
+    deflate (void)
+    {
+      const std::size_t buf_len = 8192;
+      char buf[buf_len];
+      std::size_t n_read;
+      while ((n_read = std::fread (buf, sizeof (buf[0]), buf_len, source.fp)) != 0)
+        {
+          if (std::ferror (source.fp))
+            throw std::runtime_error ("failed to read from source file");
+          BZ2_bzWrite (&status, bz, buf, n_read);
+          if (status == BZ_IO_ERROR)
+            throw std::runtime_error ("failed to write or compress");
+        }
+      if (std::ferror (source.fp))
+        throw std::runtime_error ("failed to read from source file");
+    }
+
+    ~zipper ()
+    {
+      int abandon = (status == BZ_IO_ERROR) ? 1 : 0;
+      BZ2_bzWriteClose (&status, bz, abandon, 0, 0);
+      if (status != BZ_OK)
+        throw std::runtime_error ("failed to close bzip2 stream");
+    }
+  };
+
+public:
+  static const constexpr char* extension = ".bz2";
+
+  static void
+  zip (const std::string& source_path, const std::string& dest_path)
+  {
+    bz2::zipper (source_path, dest_path).deflate ();
+  }
+
+};
+#endif // HAVE_BZL2
+
+// Note about zlib and gzip
+//
+// gzip is a format for compressed single files.  zlib is a format
+// designed for in-memory and communication channel applications.
+// gzip uses the same format internally for the compressed data but
+// has different headers and trailers.
+//
+// zlib is also a library but gzip is not.  Very old versions of zlib do
+// not include functions to create useful gzip headers and trailers:
+//
+//      Note that you cannot specify special gzip header contents (e.g.
+//      a file name or modification date), nor will inflate tell you what
+//      was in the gzip header. If you need to customize the header or
+//      see what's in it, you can use the raw deflate and inflate
+//      operations and the crc32() function and roll your own gzip
+//      encoding and decoding. Read the gzip RFC 1952 for details of the
+//      header and trailer format.
+//                                                          zlib FAQ
+//
+// Recent versions (on which we are already dependent) have deflateInit2()
+// to do it.  We still need to get the right metadata for the header
+// ourselves though.
+//
+// The header is defined in RFC #1952
+// GZIP file format specification version 4.3
+
+
+#ifdef HAVE_Z
+class gz
+{
+private:
+
+  // Util class to get a non-const char*
+  class uchar_array
+  {
+  public:
+    // Bytef is a typedef for unsigned char
+    unsigned char* p;
+
+    uchar_array (const std::string& str)
+    {
+      p = new Bytef[str.length () +1];
+      std::strcpy (reinterpret_cast<char*> (p), str.c_str ());
+    }
+
+    ~uchar_array (void) { delete[] p; }
+  };
+
+  // This is the really thing that needs to be 
+  class gzip_stream : public z_stream
+  {
+  public:
+    gzip_stream ()
+    {
+      zalloc = Z_NULL;
+      zfree = Z_NULL;
+      opaque = Z_NULL;
+    }
+
+    ~gzip_stream ()
+    {
+      int status = deflateEnd (this);
+      if (status != Z_OK)
+        throw std::runtime_error ("failed to close zlib stream");
+    }
+  };
+
+  class gzip_header : public gz_header
+  {
+  private:
+    uchar_array basename;
+
+  public:
+    gzip_header (const std::string& source_path)
+      : basename (octave::sys::env::base_pathname (source_path))
+    {
+      const octave::sys::file_stat source_stat (source_path);
+      if (! source_stat)
+        throw std::runtime_error ("unable to stat source file");
+
+      // time_t may be a signed int in which case it will be a
+      // positive number so it is safe to uLong.  Or is it?  Can
+      // unix_time really never be negative?
+      time = uLong (source_stat.mtime ().unix_time ());
+
+      //  If FNAME is set, an original file name is present,
+      //  terminated by a zero byte.  The name must consist of ISO
+      //  8859-1 (LATIN-1) characters; on operating systems using
+      //  EBCDIC or any other character set for file names, the name
+      //  must be translated to the ISO LATIN-1 character set.  This
+      //  is the original name of the file being compressed, with any
+      //  directory components removed, and, if the file being
+      //  compressed is on a file system with case insensitive names,
+      //  forced to lower case.
+      name = basename.p;
+
+      // If we don't set it to Z_NULL, then it will set FCOMMENT (4th bit)
+      // on the FLG byte, and then write {0, 3} comment.
+      comment = Z_NULL;
+
+      // Seems to already be the default but we are not taking chances.
+      extra = Z_NULL;
+
+      // We do not want a CRC for the header.  That would be only 2 more
+      // bytes, and maybe it would be a good thing but we want to generate
+      // gz files similar to the default gzip application.
+      hcrc = 0;
+
+      // OS (Operating System):
+      //      0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
+      //      1 - Amiga
+      //      2 - VMS (or OpenVMS)
+      //      3 - Unix
+      //      4 - VM/CMS
+      //      5 - Atari TOS
+      //      6 - HPFS filesystem (OS/2, NT)
+      //      7 - Macintosh
+      //      8 - Z-System
+      //      9 - CP/M
+      //     10 - TOPS-20
+      //     11 - NTFS filesystem (NT)
+      //     12 - QDOS
+      //     13 - Acorn RISCOS
+      //    255 - unknown
+      //
+      // The list is problematic because it mixes OS and filesystem.  It
+      // also does not specify whether filesystem relates to source or
+      // destination file.
+
+#if defined (__WIN32__)
+      os = 0; // or should it be 11?
+#elif defined (__APPLE__)
+      os = 7;
+#else // unix by default?
+      os = 3;
+#endif
+    }
+  };
+
+  class zipper
+  {
+  private:
+    CFile source;
+    CFile dest;
+    gzip_header header;
+    gzip_stream strm = gzip_stream ();
+
+  public:
+    zipper (const std::string& source_path, const std::string& dest_path)
+      : source (source_path, "rb"), dest (dest_path, "wb"),
+        header (source_path)
+    { }
+
+    void
+    deflate ()
+    {
+      // int deflateInit2 (z_streamp strm,
+      //                   int  level,      // compression level (default is 8)
+      //                   int  method,
+      //                   int  windowBits, // 15 (default) + 16 (gzip format)
+      //                   int  memLevel,   // memory usage (default is 8)
+      //                   int  strategy);
+
+      int status = deflateInit2 (&strm, 8, Z_DEFLATED, 31, 8,
+                                 Z_DEFAULT_STRATEGY);
+      if (status != Z_OK)
+        throw std::runtime_error ("failed to open zlib stream");
+
+      deflateSetHeader (&strm, &header);
+
+      const std::size_t buf_len = 8192;
+      unsigned char buf_in[buf_len];
+      unsigned char buf_out[buf_len];
+
+      while ((strm.avail_in = std::fread (buf_in, sizeof (buf_in[0]),
+                                          buf_len, source.fp)) != 0)
+        {
+          if (std::ferror (source.fp))
+            throw std::runtime_error ("failed to read source file");
+
+          strm.next_in = buf_in;
+          const int flush = std::feof (source.fp) ? Z_FINISH : Z_NO_FLUSH;
+
+          // If deflate returns Z_OK and with zero avail_out, it must be
+          // called again after making room in the output buffer because
+          // there might be more output pending.
+          do
+            {
+              strm.avail_out = buf_len;
+              strm.next_out = buf_out;
+              status = ::deflate (&strm, flush);
+              if (status == Z_STREAM_ERROR)
+                throw std::runtime_error ("failed to deflate");
+
+              std::fwrite (buf_out, sizeof (buf_out[0]),
+                           buf_len - strm.avail_out, dest.fp);
+              if (std::ferror (dest.fp))
+                throw std::runtime_error ("failed to write file");
+            }
+          while (strm.avail_out == 0);
+
+          if (strm.avail_in != 0)
+            throw std::runtime_error ("failed to wrote file");
+        }
+    }
+  };
+
+public:
+  static const constexpr char* extension = ".gz";
+
+  static void
+  zip (const std::string& source_path, const std::string& dest_path)
+  {
+    gz::zipper (source_path, dest_path).deflate ();
+  }
+};
+#endif // HAVE_Z
+
+
+template<typename X>
+string_vector
+xzip (const Array<std::string>& source_patterns,
+      const std::function<std::string(const std::string&)>& mk_dest_path)
+{
+  std::list<std::string> dest_paths;
+
+  std::function<void(const std::string&)> walk;
+  walk = [&walk, &mk_dest_path, &dest_paths] (const std::string& path) -> void
+  {
+    const octave::sys::file_stat fs (path);
+    // is_dir and is_reg will return false if failed to stat.
+    if (fs.is_dir ())
+      {
+        octave::sys::dir_entry dir (path);
+        if (dir)
+          {
+            // Collect the whole list of filenames first, before recursion
+            // to avoid issues with infinite loop if the action generates
+            // files in the same directory (highly likely).
+            string_vector dirlist = dir.read ();
+            for (octave_idx_type i = 0; i < dirlist.numel (); i++)
+              if (dirlist(i) != "." && dirlist(i) != "..")
+                walk (octave::sys::file_ops::concat (path, dirlist(i)));
+          }
+        // Note that we skip any problem with directories.
+      }
+    else if (fs.is_reg ())
+      {
+        const std::string dest_path = mk_dest_path (path);
+        try
+          {
+            X::zip (path, dest_path);
+          }
+        catch (...)
+          {
+            // Error "handling" is not including filename on the output list.
+            // Also remove created file which maybe was not even created
+            // in the first place.  Note that it is possible for the file
+            // to exist in the first place and for for X::zip to not have
+            // clobber it yet but we remove it anyway by design.
+            octave::sys::unlink (dest_path);
+            return;
+          }
+        dest_paths.push_front (dest_path);
+      }
+    // Skip all other file types and errors.
+    return;
+  };
+
+  for (octave_idx_type i = 0; i < source_patterns.numel (); i++)
+    {
+      const glob_match pattern (octave::sys::file_ops::tilde_expand (source_patterns(i)));
+      const string_vector filepaths = pattern.glob ();
+      for (octave_idx_type j = 0; j < filepaths.numel (); j++)
+        walk (filepaths(j));
+    }
+  return string_vector (dest_paths);
+}
+
+
+template<typename X>
+string_vector
+xzip (const Array<std::string>& source_patterns)
+{
+  const std::string ext = X::extension;
+  const std::function<std::string(const std::string&)> mk_dest_path
+    = [&ext] (const std::string& source_path) -> std::string
+  {
+    return source_path + ext;
+  };
+  return xzip<X> (source_patterns, mk_dest_path);
+}
+
+template<typename X>
+string_vector
+xzip (const Array<std::string>& source_patterns, const std::string& out_dir)
+{
+  const std::string ext = X::extension;
+  const std::function<std::string(const std::string&)> mk_dest_path
+    = [&out_dir, &ext] (const std::string& source_path) -> std::string
+  {
+    const std::string basename = octave::sys::env::base_pathname (source_path);
+    return octave::sys::file_ops::concat (out_dir, basename + ext);
+  };
+
+  // We don't care if mkdir fails.  Maybe it failed because it already
+  // exists, or maybe it can't bre created.  If the first, then there's
+  // nothing to do, if the later, then it will be handled later.  Any
+  // is to be handled by not listing files in the output.
+  octave::sys::mkdir (out_dir, 0777);
+  return xzip<X> (source_patterns, mk_dest_path);
+}
+
+template<typename X>
+static octave_value_list
+xzip (const std::string& func_name, const octave_value_list& args)
+{
+  const octave_idx_type nargin = args.length ();
+  if (nargin < 1 || nargin > 2)
+    print_usage ();
+
+  const Array<std::string> source_patterns
+    = args(0).xcellstr_value ("%s: FILES must be a character array or cellstr",
+                              func_name.c_str ());
+  if (nargin == 1)
+    return octave_value (Cell (xzip<X> (source_patterns)));
+  else // nargin == 2
+    {
+      const std::string out_dir = args(1).string_value ();
+      return octave_value (Cell (xzip<X> (source_patterns, out_dir)));
+    }
+}
+
+DEFUN_DLD (gzip, args, ,
+           doc: /* -*- texinfo -*-
+@deftypefn  {} {@var{filelist} =} gzip (@var{files})
+@deftypefnx {} {@var{filelist} =} gzip (@var{files}, @var{dir})
+Compress the list of files and directories specified in @var{files}.
+
+@var{files} is a character array or cell array of strings.  Shell wildcards
+in the filename such as @samp{*} or @samp{?} are accepted and expanded.
+Each file is compressed separately and a new file with a @file{".gz"}
+extension is created.  The original files are not modified, but existing
+compressed files will be silently overwritten.  If a directory is
+specified then @code{gzip} recursively compresses all files in the
+directory.
+
+If @var{dir} is defined the compressed files are placed in this directory,
+rather than the original directory where the uncompressed file resides.
+Note that this does not replicate a directory tree in @var{dir} which may
+lead to files overwritting each other if there are multiple files with the
+same name.
+
+If @var{dir} does not exist it is created.
+
+The optional output @var{filelist} is a list of the compressed files.
+@seealso{gunzip, unpack, bzip2, zip, tar}
+@end deftypefn */)
+{
+#ifndef HAVE_Z
+  err_disabled_feature ("gzip", "gzip");
+#else
+  return xzip<gz> ("gzip", args);
+#endif
+}
+
+/*
+%!error gzip ()
+%!error gzip ("1", "2", "3")
+%!error <FILES must be a character array or cellstr> gzip (1)
+*/
+
+DEFUN_DLD (bzip2, args, ,
+           doc: /* -*- texinfo -*-
+@deftypefn  {} {@var{filelist} =} bzip2 (@var{files})
+@deftypefnx {} {@var{filelist} =} bzip2 (@var{files}, @var{dir})
+Compress the list of files specified in @var{files}.
+
+@var{files} is a character array or cell array of strings.  Shell wildcards
+in the filename such as @samp{*} or @samp{?} are accepted and expanded.
+Each file is compressed separately and a new file with a @file{".bz2"}
+extension is created.  The original files are not modified, but existing
+compressed files will be silently overwritten.
+
+If @var{dir} is defined the compressed files are placed in this directory,
+rather than the original directory where the uncompressed file resides.
+Note that this does not replicate a directory tree in @var{dir} which may
+lead to files overwritting each other if there are multiple files with the
+same name.
+
+If @var{dir} does not exist it is created.
+
+The optional output @var{filelist} is a list of the compressed files.
+@seealso{bunzip2, unpack, gzip, zip, tar}
+@end deftypefn */)
+{
+#ifndef HAVE_BZ2
+  err_disabled_feature ("bzip2", "bzip2");
+#else
+  return xzip<bz2> ("bzip2", args);
+#endif
+}
+
+// Tests for both gzip/bzip2 and gunzip/bunzip2
+/*
+
+## Takes a single argument, a function handle for the test.  This other
+## function must accept two arguments, a directory for the tests, and
+## a cell array with zip function, unzip function, and file extension.
+
+%!function run_test_function (test_function)
+%!  enabled_zippers = cell (0, 0);
+%!  if (__octave_config_info__ ().build_features.BZ2)
+%!    enabled_zippers(1, end+1) = @bzip2;
+%!    enabled_zippers(2, end) = @bunzip2;
+%!    enabled_zippers(3, end) = ".bz2";
+%!  endif
+%!  if (__octave_config_info__ ().build_features.Z)
+%!    enabled_zippers(1, end+1) = @gzip;
+%!    enabled_zippers(2, end) = @gunzip;
+%!    enabled_zippers(3, end) = ".gz";
+%!  endif
+%!
+%!  for z = enabled_zippers
+%!    test_dir = tempname ();
+%!    if (! mkdir (test_dir))
+%!      error ("unable to create directory for tests");
+%!    endif
+%!    unwind_protect
+%!      test_function (test_dir, z)
+%!    unwind_protect_cleanup
+%!      confirm_recursive_rmdir (false, "local");
+%!      rmdir (test_dir, "s");
+%!    end_unwind_protect
+%!  endfor
+%!endfunction
+
+%!function create_file (fpath, data)
+%!  fid = fopen (fpath, "wb");
+%!  if (fid < 0)
+%!    error ("unable to open file for writing");
+%!  endif
+%!  if (fwrite (fid, data, class (data)) != numel (data))
+%!    error ("unable to write to file");
+%!  endif
+%!  if (fflush (fid) || fclose (fid))
+%!    error ("unable to flush or close file");
+%!  endif
+%!endfunction
+
+## Test with large files because of varied buffer size
+%!function test_large_file (test_dir, z)
+%!  test_file = tempname (test_dir);
+%!  create_file (test_file, rand (500000, 1));
+%!  md5 = hash ("md5", fileread (test_file));
+%!
+%!  expected_z_file = [test_file z{3}];
+%!  z_files = z{1} (test_file);
+%!  assert (z_files, {expected_z_file})
+%!
+%!  unlink (test_file);
+%!  assert (z{2} (z_files{1}), {test_file})
+%!  assert (hash ("md5", fileread (test_file)), md5)
+%!endfunction
+%!test run_test_function (@test_large_file)
+
+## Test that xzipped files are rexzipped
+%!function test_z_z (test_dir, z)
+%!  ori_file = tempname (test_dir);
+%!  create_file (ori_file, rand (100, 1));
+%!  md5_ori = hash ("md5", fileread (ori_file));
+%!
+%!  z_file = [ori_file z{3}];
+%!  filelist = z{1} (ori_file);
+%!  assert (filelist, {z_file}) # check output
+%!  assert (exist (z_file), 2) # confirm file exists
+%!  assert (exist (z_file), 2) # and did not remove original file
+%!  md5_z = hash ("md5", fileread (z_file));
+%!
+%!  unlink (ori_file);
+%!  assert (z{2} (z_file), {ori_file})
+%!  ## bug #48597
+%!  assert (exist (ori_file), 2) # bug #48597 (Xunzip should not remove file)
+%!  assert (hash ("md5", fileread (ori_file)), md5_ori)
+%!
+%!  ## xzip should dutifully re-xzip files even if they already are zipped
+%!  z_z_file = [z_file z{3}];
+%!
+%!  filelist = z{1} (z_file);
+%!  assert (filelist, {z_z_file}) # check output
+%!  assert (exist (z_z_file), 2) # confirm file exists
+%!  assert (exist (z_z_file), 2) # and did not remove original file
+%!
+%!  unlink (z_file);
+%!  assert (z{2} (z_z_file), {z_file})
+%!  assert (hash ("md5", fileread (z_file)), md5_z)
+%!endfunction
+%!test run_test_function (@test_z_z)
+
+%!function test_xzip_dir (test_dir, z)
+%!  fpaths = fullfile (test_dir, {"test1", "test2", "test3"});
+%!  z_files = strcat (fpaths, z{3});
+%!  md5s = cell (1, 3);
+%!  for idx = 1:numel(fpaths)
+%!    create_file (fpaths{idx}, rand (100, 1));
+%!    md5s(idx) = hash ("md5", fileread (fpaths{idx}));
+%!  endfor
+%!
+%!  assert (sort (z{1} ([test_dir filesep()])), z_files(:))
+%!  for idx = 1:numel(fpaths)
+%!    assert (exist (z_files{idx}), 2)
+%!    unlink (fpaths{idx});
+%!  endfor
+%!  for idx = 1:numel(fpaths)
+%!    assert (z{2} (z_files{idx}), fpaths{idx}); # bug #48598
+%!    assert (hash ("md5", fileread (fpaths{idx})), md5s{idx})
+%!  endfor
+%!endfunction
+%!test run_test_function (@test_xzip_dir)
+
+%!function test_save_to_dir (test_dir, z)
+%!  filename = "test-file";
+%!  filepath = fullfile (test_dir, filename);
+%!  create_file (filepath, rand (100, 1));
+%!  md5 = hash ("md5", fileread (filepath));
+%!
+%!  ## test with existing and non-existing directory
+%!  out_dirs = {tempname (test_dir), tempname (test_dir)};
+%!  if (! mkdir (out_dirs{1}))
+%!    error ("unable to create directory for test");
+%!  endif
+%!  for idx = 1:numel(out_dirs)
+%!    out_dir = out_dirs{idx};
+%!    z_file = fullfile (out_dir, [filename z{3}]);
+%!    assert (z{1} (filepath, out_dir), {z_file})
+%!    assert (exist (z_file, "file"), 2)
+%!    uz_file = z_file(1:(end-numel(z{3})));
+%!    assert (z{2} (z_file), uz_file); # bug #48598
+%!    assert (hash ("md5", fileread (uz_file)), md5)
+%!  endfor
+%!endfunction
+%!test run_test_function (@test_save_to_dir)
+*/
--- a/scripts/miscellaneous/bzip2.m	Fri Jul 22 12:08:51 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-## Copyright (C) 2008-2015 Thorsten Meyer
-## (based on gzip.m by David Bateman)
-##
-## This file is part of Octave.
-##
-## Octave is free software; you can redistribute it and/or modify it
-## under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 3 of the License, or (at
-## your option) any later version.
-##
-## Octave is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Octave; see the file COPYING.  If not, see
-## <http://www.gnu.org/licenses/>.
-
-## -*- texinfo -*-
-## @deftypefn  {} {@var{filelist} =} bzip2 (@var{files})
-## @deftypefnx {} {@var{filelist} =} bzip2 (@var{files}, @var{dir})
-## Compress the list of files specified in @var{files}.
-##
-## @var{files} is a character array or cell array of strings.  Shell wildcards
-## in the filename such as @samp{*} or @samp{?} are accepted and expanded.
-## Each file is compressed separately and a new file with a @file{".bz2"}
-## extension is created.  The original files are not modified, but existing
-## compressed files will be silently overwritten.
-##
-## If @var{dir} is defined the compressed files are placed in this directory,
-## rather than the original directory where the uncompressed file resides.
-## If @var{dir} does not exist it is created.
-##
-## The optional output @var{filelist} is a list of the compressed files.
-## @seealso{bunzip2, unpack, gzip, zip, tar}
-## @end deftypefn
-
-function [varargout] = bzip2 (varargin)
-
-  if (nargin < 1 || nargin > 2 || nargout > 1)
-    print_usage ();
-  endif
-
-  [varargout{1:nargout}] = __xzip__ ("bzip2", "bz2", "bzip2 -- %s",
-                                     varargin{:});
-
-endfunction
-
-
-%!xtest
-%! ## test bzip2 together with bunzip2
-%! unwind_protect
-%!   filename = tempname;
-%!   dummy    = pi;
-%!   save (filename, "dummy");
-%!   dirname  = tempname;
-%!   mkdir (dirname);
-%!   filelist = bzip2 (filename, dirname);
-%!   filelist = filelist{1};
-%!   [~, basename, extension] = fileparts (filename);
-%!   if (! strcmp (filelist, [dirname, filesep, basename, extension, ".bz2"]))
-%!     error ("bzipped file does not match expected name!");
-%!   endif
-%!   if (! exist (filelist, "file"))
-%!     error ("bzipped file cannot be found!");
-%!   endif
-%!   bunzip2 (filelist);
-%!   fid = fopen (filename, "rb");
-%!   assert (fid >= 0);
-%!   orig_data = fread (fid);
-%!   fclose (fid);
-%!   fid = fopen ([dirname filesep basename extension], "rb");
-%!   assert (fid >= 0);
-%!   new_data = fread (fid);
-%!   fclose (fid);
-%!   if (orig_data != new_data)
-%!     error ("bunzipped file not equal to original file!");
-%!   endif
-%! unwind_protect_cleanup
-%!   delete (filename);
-%!   delete ([dirname, filesep, basename, extension]);
-%!   rmdir (dirname);
-%! end_unwind_protect
-
--- a/scripts/miscellaneous/gzip.m	Fri Jul 22 12:08:51 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-## Copyright (C) 2007-2015 David Bateman
-##
-## This file is part of Octave.
-##
-## Octave is free software; you can redistribute it and/or modify it
-## under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 3 of the License, or (at
-## your option) any later version.
-##
-## Octave is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Octave; see the file COPYING.  If not, see
-## <http://www.gnu.org/licenses/>.
-
-## -*- texinfo -*-
-## @deftypefn  {} {@var{filelist} =} gzip (@var{files})
-## @deftypefnx {} {@var{filelist} =} gzip (@var{files}, @var{dir})
-## Compress the list of files and directories specified in @var{files}.
-##
-## @var{files} is a character array or cell array of strings.  Shell wildcards
-## in the filename such as @samp{*} or @samp{?} are accepted and expanded.
-## Each file is compressed separately and a new file with a @file{".gz"}
-## extension is created.  The original files are not modified, but existing
-## compressed files will be silently overwritten.  If a directory is
-## specified then @code{gzip} recursively compresses all files in the
-## directory.
-##
-## If @var{dir} is defined the compressed files are placed in this directory,
-## rather than the original directory where the uncompressed file resides.
-## If @var{dir} does not exist it is created.
-##
-## The optional output @var{filelist} is a list of the compressed files.
-## @seealso{gunzip, unpack, bzip2, zip, tar}
-## @end deftypefn
-
-function [varargout] = gzip (varargin)
-
-  if (nargin < 1 || nargin > 2 || nargout > 1)
-    print_usage ();
-  endif
-
-  [varargout{1:nargout}] = __xzip__ ("gzip", "gz", "gzip -r -- %s",
-                                     varargin{:});
-
-endfunction
-
-
-%!xtest
-%! ## test gzip together with gunzip
-%! unwind_protect
-%!   filename = tempname;
-%!   dummy    = pi;
-%!   save (filename, "dummy");
-%!   dirname  = tempname;
-%!   mkdir (dirname);
-%!   filelist = gzip (filename, dirname);
-%!   filelist = filelist{1};
-%!   [~, basename, extension] = fileparts (filename);
-%!   if (! strcmp (filelist, [dirname, filesep, basename, extension, ".gz"]))
-%!     error ("gzipped file does not match expected name!");
-%!   endif
-%!   if (! exist (filelist, "file"))
-%!     error ("gzipped file cannot be found!");
-%!   endif
-%!   gunzip (filelist);
-%!   fid = fopen (filename, "rb");
-%!   assert (fid >= 0);
-%!   orig_data = fread (fid);
-%!   fclose (fid);
-%!   fid = fopen ([dirname filesep basename extension], "rb");
-%!   assert (fid >= 0);
-%!   new_data = fread (fid);
-%!   fclose (fid);
-%!   if (orig_data != new_data)
-%!     error ("gunzipped file not equal to original file!");
-%!   endif
-%! unwind_protect_cleanup
-%!   delete (filename);
-%!   delete ([dirname, filesep, basename, extension]);
-%!   rmdir (dirname);
-%! end_unwind_protect
-
-%!error gzip ()
-%!error gzip ("1", "2", "3")
-%!error <FILES must be a character array or cellstr> gzip (1)
-
--- a/scripts/miscellaneous/module.mk	Fri Jul 22 12:08:51 2016 -0400
+++ b/scripts/miscellaneous/module.mk	Sun Jun 26 13:32:03 2016 +0200
@@ -4,13 +4,11 @@
 
 scripts_miscellaneous_PRIVATE_FCN_FILES = \
   scripts/miscellaneous/private/display_info_file.m \
-  scripts/miscellaneous/private/__w2mpth__.m \
-  scripts/miscellaneous/private/__xzip__.m
+  scripts/miscellaneous/private/__w2mpth__.m
 
 scripts_miscellaneous_FCN_FILES = \
   scripts/miscellaneous/bug_report.m \
   scripts/miscellaneous/bunzip2.m \
-  scripts/miscellaneous/bzip2.m \
   scripts/miscellaneous/cast.m \
   scripts/miscellaneous/citation.m \
   scripts/miscellaneous/compare_versions.m \
@@ -30,7 +28,6 @@
   scripts/miscellaneous/getappdata.m \
   scripts/miscellaneous/getfield.m \
   scripts/miscellaneous/gunzip.m \
-  scripts/miscellaneous/gzip.m \
   scripts/miscellaneous/info.m \
   scripts/miscellaneous/inputname.m \
   scripts/miscellaneous/isappdata.m \
--- a/scripts/miscellaneous/private/__xzip__.m	Fri Jul 22 12:08:51 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-## Copyright (C) 2008-2015 Thorsten Meyer
-## based on the original gzip function by David Bateman
-##
-## This file is part of Octave.
-##
-## Octave is free software; you can redistribute it and/or modify it
-## under the terms of the GNU General Public License as published by
-## the Free Software Foundation; either version 3 of the License, or (at
-## your option) any later version.
-##
-## Octave is distributed in the hope that it will be useful, but
-## WITHOUT ANY WARRANTY; without even the implied warranty of
-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-## General Public License for more details.
-##
-## You should have received a copy of the GNU General Public License
-## along with Octave; see the file COPYING.  If not, see
-## <http://www.gnu.org/licenses/>.
-
-## -*- texinfo -*-
-## @deftypefn {} {@var{filelist} =} __xzip__ (@var{commandname}, @var{extension}, @var{commandtemplate}, @var{files}, @var{outdir})
-## Undocumented internal function.
-## @end deftypefn
-
-## Compress the list of files and/or directories specified in @var{files}
-## with the external compression command @var{commandname}.  The template
-## @var{commandtemplate} is used to actually start the command.  Each file
-## is compressed separately and a new file with the extension @var{extension}
-## is created and placed into the directory @var{outdir}.  The original files
-## are not touched.  Existing compressed files are silently overwritten.
-## This is an internal function.  Do not use directly.
-
-function filelist = __xzip__ (commandname, extension, commandtemplate,
-                              files, outdir)
-
-  if (nargin == 5 && ! exist (outdir, "dir"))
-    r = mkdir (outdir);
-    if (! r)
-      error ("%s: Failed to create output directory DIR", commandname);
-    endif
-  endif
-
-  if (ischar (files))
-    files = cellstr (files);
-  elseif (! iscellstr (files))
-    error ("%s: FILES must be a character array or cellstr", commandname);
-  endif
-
-  if (nargin == 4)
-    outdir = tempname ();
-    r = mkdir (outdir);
-    if (! r)
-      error ("%s: Failed to create temporary output directory", commandname);
-    endif
-  endif
-
-  cwd = pwd ();
-  unwind_protect
-    files = glob (files);
-
-    ## Ignore any file with the compress extension
-    files(cellfun (@(x) (length (x) > length (extension)
-                         && strcmp (x((end - length (extension) + 1):end),
-                                    extension)),
-                   files)) = [];
-
-    copyfile (files, outdir);
-
-    fnames = fname_only (files);
-
-    cd (outdir);
-
-    cmd = sprintf (commandtemplate, sprintf (' "%s"', fnames{:}));
-
-    [status, output] = system (cmd);
-    if (status)
-      fcn = evalin ("caller", "mfilename ()");
-      error ("%s: %s command failed with exit status = %d",
-             fcn, commandname, status);
-    endif
-
-    if (nargin == 5)
-      if (nargout > 0)
-        ## FIXME: This doesn't work if a directory is compressed
-        filelist = cellfun (
-            @(x) fullfile (outdir, sprintf ("%s.%s", x, extension)),
-            fnames, "uniformoutput", false);
-      endif
-    else
-      ## FIXME: This does not work when you try to compress directories
-      ##        The resulting name is dir/.gz which is totally wrong.
-      ##        See bug #43431.
-      movefile (cellfun (@(x) sprintf ("%s.%s", x, extension),
-                         fnames, "uniformoutput", false), cwd);
-      if (nargout > 0)
-        filelist = cellfun (@(x) sprintf ("%s.%s", x, extension),
-                            files, "uniformoutput", false);
-      endif
-    endif
-
-  unwind_protect_cleanup
-    cd (cwd);
-    if (nargin == 4)
-      confirm_recursive_rmdir (false, "local");
-      rmdir (outdir, "s");
-    endif
-  end_unwind_protect
-
-endfunction
-
-function f = fname_only (files)
-  [~, f, ext] = cellfun ("fileparts", files, "uniformoutput", false);
-  f = cellfun (@(x, y) sprintf ("%s%s", x, y), f, ext, "uniformoutput", false);
-  idx = cellfun ("isdir", files);
-  f(idx) = files(idx);
-endfunction
-