view scripts/miscellaneous/unpack.m @ 31193:6bcc5e6d77fe stable

unpack.m: Escape backslashes in paths on Windows (bug #62888). * scripts/miscalleneous/unpack.m: Escape backslashes in paths on Windows. That seems to be necessary to correctly handle double backslashes that prefix UNC paths.
author Markus Mützel <markus.muetzel@gmx.de>
date Thu, 18 Aug 2022 08:40:57 +0200
parents 796f54d4ddbf
children c8ad083a5802
line wrap: on
line source

########################################################################
##
## Copyright (C) 2006-2022 The Octave Project Developers
##
## See the file COPYRIGHT.md in the top-level directory of this
## distribution or <https://octave.org/copyright/>.
##
## This file is part of Octave.
##
## Octave is free software: you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <https://www.gnu.org/licenses/>.
##
########################################################################

## -*- texinfo -*-
## @deftypefn  {} {@var{files} =} unpack (@var{file})
## @deftypefnx {} {@var{files} =} unpack (@var{file}, @var{dir})
## @deftypefnx {} {@var{files} =} unpack (@var{file}, @var{dir}, @var{filetype})
## Unpack the archive @var{file} based on its extension to the directory
## @var{dir}.
##
## If @var{file} is a list of strings, then each file is unpacked
## individually.  Shell wildcards in the filename such as @samp{*} or
## @samp{?} are accepted and expanded.
##
## If @var{dir} is not specified or is empty (@code{[]}), it defaults to the
## current directory.  If a directory is in the file list, then @var{filetype}
## must also be specified.
##
## The specific archive filetype is inferred from the extension of the file.
## The @var{filetype} may also be specified directly using a string which
## corresponds to a known extension.
##
## Valid filetype extensions:
##
## @table @code
## @item  @nospell{bz}
## @itemx @nospell{bz2}
## bzip archive
##
## @item @nospell{gz}
## gzip archive
##
## @item tar
## tar archive
##
## @item  tarbz
## @itemx tarbz2
## @itemx tbz
## @itemx tbz2
## tar + bzip archive
##
## @item  targz
## @itemx tgz
## tar + gzip archive
##
## @item z
## compress archive
##
## @item zip
## zip archive
## @end table
##
## The optional return value is a list of @var{files} unpacked.
## @seealso{bunzip2, gunzip, unzip, untar, bzip2, gzip, zip, tar}
## @end deftypefn

function filelist = unpack (file, dir = [], filetype = "")

  if (nargin < 1)
    print_usage ();
  endif

  if (! ischar (file) && ! iscellstr (file))
    error ("unpack: FILE must be a string or cell array of strings");
  endif

  ## Convert char arrays to cell strings to simplify further processing
  if (ischar (file))
    file = cellstr (file);
  endif
  if (numel (file) == 1)
    ## FIXME: The code below is not a perfect test for a URL
    if (isempty (strfind (file{1}, "://")))
      if (ispc ())
        gfile = __wglob__ (file);
      else
        gfile = glob (file);
      endif
      if (isempty (gfile))
        error ('unpack: FILE "%s" not found', file{1});
      else
        file = gfile;
      endif
    endif
  endif

  ## Recursively unpack cellstr arrays one file at a time
  if (numel (file) > 1)
    files = {};
    for i = 1:numel (file)
      if (! isempty (dir))
        tmpfiles = unpack (file{i}, dir);
      else
        tmpfiles = unpack (file{i}, fileparts (file{i}));
      endif
      files = {files{:} tmpfiles{:}};
    endfor

    ## Return output if requested.
    if (nargout > 0)
      filelist = files;
    endif
    return;

  else
    file = file{1};
  endif

  if (nargin == 3 && (! ischar (filetype) || ! isrow (filetype)))
    error ("unpack: FILETYPE must be a string");
  endif

  if (isfolder (file))
    if (isempty (filetype))
      error ("unpack: FILETYPE must be given for a directory");
    elseif (! strcmpi (filetype, "gz"))
      error ('unpack: FILETYPE must be "gz" for a directory');
    endif
    ext = ".gz";
  else
    [pathstr, name, ext] = fileparts (file);

    if (nargin == 3 && ! strcmpi (ext, filetype))
      ## override extension with given filetype
      if (isempty (ext))
        ext = filetype;
      else
        ext = regexprep (ext, '(\.?)\S*$', ['$1' filetype]);
      endif
    endif

    ## Check to see if it's .tar.gz, .tar.Z, etc.
    if (any (strcmpi ({".gz" ".Z" ".bz2" ".bz"}, ext)))
      [~, tmpname, tmpext] = fileparts (name);
      if (strcmpi (tmpext, ".tar"))
        name = tmpname;
        ext = [tmpext ext];
      endif
    endif

    ## If the file is a URL, download it and then work with that file.
    if (! isempty (strfind (file, "://")))
      ## FIXME: The above code is not a perfect test for a URL
      urlfile = file;
      tmpfile = fullfile (tempdir (), [name ext]);
      [file, success, msg] = urlwrite (urlfile, tmpfile);
      if (! success)
        error ('unpack: could not fetch "%s": %s', urlfile, msg);
      endif
    endif

  endif

  file = make_absolute_filename (file);

  if (isempty (dir))
    dir = ".";
  else
    dir = tilde_expand (dir);
  endif

  ## Instructions on what to do for any extension.
  ##
  ## The field names are the file extension without periods.
  ## The first cell is what is executed to unpack an archive verbosely.
  ## The second cell is what is executed to unpack an archive quietly.
  ## The third cell is the function to execute on output to get the files list.
  ## The fourth cell indicates if the files may need to be manually moved
  ##   (i.e., tar and unzip decompress into the current directory while
  ##    bzip2 and gzip decompress the file at its location).
  persistent commandlist;
  if (isempty (commandlist))
    commandlist.gz = {'gzip -d -k -v -f -r "%s"', ...
                      'gzip -d -k -f -r "%s"', ...
                      @__parse_gzip__, true};
    commandlist.z = commandlist.gz;
    commandlist.bz2 = {'bzip2 -d -k -v -f "%s"', ...
                       'bzip2 -d -k -f "%s"', ...
                       @__parse_bzip2__, true};
    commandlist.bz = commandlist.bz2;
    commandlist.tar = {'tar xvf "%s"', ...
                       'tar xf "%s"', ...
                       @__parse_tar__, false};
    commandlist.targz = {'gzip -d -c "%s" | tar xvf -', ...
                         'gzip -d -c "%s" | tar xf -', ...
                         @__parse_tar__, false};
    commandlist.tgz = commandlist.targz;
    commandlist.tarbz2 = {'bzip2 -d -c "%s" | tar xvf -', ...
                          'bzip2 -d -c "%s" | tar xf -', ...
                          @__parse_tar__, false};
    commandlist.tarbz = commandlist.tarbz2;
    commandlist.tbz2 = commandlist.tarbz2;
    commandlist.tbz = commandlist.tarbz2;
    commandlist.zip = {'unzip -n "%s"', ...
                       'unzip -nq "%s"', ...
                       @__parse_zip__, false};
  endif

  ## Unzip doesn't actually care about the extension
  if (strcmpi (filetype, "zip"))
    nodotext = "zip";
  else
    nodotext = ext(ext != '.');
  endif

  if (ispc && strcmp (nodotext, "tar"))
    ## Change file pathname into a mingw style acceptable for tar
    file = __w2mpth__ (file);
  endif

  ## Create the output directory if necessary.
  s = stat (dir);
  if (isempty (s))
    [status, msg] = mkdir (dir);
    if (! status)
      error ("unpack: mkdir failed to create %s: %s", dir, msg);
    endif
  elseif (! S_ISDIR (s.mode))
    error ("unpack: %s: not a directory", dir);
  endif

  if (isfield (commandlist, tolower (nodotext)))
    [commandv, commandq, parsefcn, move] = deal (commandlist.(nodotext){:});
    origdir = pwd ();
    if (move)
      startdir = fileparts (file);
    else
      startdir = origdir;
    endif
    cstartdir = make_absolute_filename (startdir);
    cenddir = make_absolute_filename (dir);
    if (cenddir(end) == filesep)
      cenddir(end) = [];
    endif
    needmove = move && ! is_same_file (cstartdir, cenddir);
    if (nargout > 0 || needmove)
      command = commandv;
    else
      command = commandq;
    endif
  else
    warning ("unpack: unrecognized FILETYPE <%s>", nodotext);
    filelist = {};
    return;
  endif

  ## Save and restore the TAR_OPTIONS environment variable used by GNU tar.
  tar_options_env = getenv ("TAR_OPTIONS");
  unwind_protect
    unsetenv ("TAR_OPTIONS");
    cd (dir);
    if (ispc ())
      ## Escape backslashes (necessary for UNC paths).
      file = strrep (file, '\', '\\');
    endif
    [status, output] = system (sprintf ([command " 2>&1"], file));
  unwind_protect_cleanup
    cd (origdir);
    if (! isempty (tar_options_env))
      setenv ("TAR_OPTIONS", tar_options_env);
    endif
  end_unwind_protect

  if (status)
    error ("unpack: unarchiving program exited with status: %d\n%s",
           status, output);
  endif

  if (nargout > 0 || needmove)
    ## Trim the last CR or NL if needed.
    files = parsefcn (ostrsplit (output, "\r\n", true))';

    ## Move files if necessary.
    if (needmove)
      [st, msg] = movefile (files, cenddir);
      if (! st)
        error ('unpack: unable to move files to "%s": %s', dir, msg);
      endif

      ## Fix the names of the files since they were moved.
      files = strrep (files, cstartdir, cenddir);
    endif

    ## Return output if requested.
    if (nargout > 0)
      filelist = files;
    endif
  endif

endfunction

function files = __parse_zip__ (output)
  ## Parse the output from zip and unzip.

  ## Skip first line which is Archive header.
  files = char (output(2:end));
  ## Trim constant width prefix and return cell array.
  files = cellstr (files(:,14:end));
endfunction

function output = __parse_tar__ (output)
  ## BSD tar emits file actions in the first 2 columns

  if (tar_is_bsd ())
    output = cellfun (@(x) x(3:end), output, 'UniformOutput', false);
  endif
endfunction

function files = __parse_gzip__ (output)
  ## Parse the output from gzip and gunzip returning the files
  ## compressed (or decompressed).

  files = regexprep (output, '^.+ -- (?:created|replaced with) (.*)$', '$1');
endfunction

function files = __parse_bzip2__ (output)
  ## Parse the output from bzip2 and bunzip2 returning the files
  ## compressed (or decompressed).

  ## Strip leading blanks and .bz2 extension from filename
  files = regexprep (output, '^\s+(.*)\.bz2: .*', '$1');
endfunction


%!testif HAVE_ZLIB
%! envvar = {"TMPDIR", "TMP"};
%! envdir = cellfun (@(x) getenv (x), envvar, "uniformoutput", false);
%! unwind_protect
%!   cellfun (@(x) unsetenv (x), envvar);
%!   ## Create temporary directory and file for packing and unpacking
%!   dirname = tempname ();
%!   assert (mkdir (dirname));
%!   filename = tempname ();
%!   fid = fopen (filename, "wt");
%!   assert (fid >= 0);
%!   fprintf (fid, "Hello World\n");
%!   fprintf (fid, "123 456 789\n");
%!   fclose (fid);
%!
%!   unwind_protect
%!     copyfile (filename, [filename ".orig"]);
%!     gzip (filename, dirname);
%!     [~, f] = fileparts (filename);
%!     filelist = unpack (fullfile (dirname, [f ".gz"]), tempdir);
%!     assert (filelist{1}, filename);
%!     fid = fopen ([filename ".orig"], "rb");
%!     assert (fid >= 0);
%!     orig_data = fread (fid);
%!     fclose (fid);
%!     fid = fopen (filename, "rb");
%!     assert (fid >= 0);
%!     new_data = fread (fid);
%!     fclose (fid);
%!     if (orig_data != new_data)
%!       error ("unpack: Unpacked file does not equal original");
%!     endif
%!   unwind_protect_cleanup
%!     unlink (filename);
%!     unlink ([filename ".orig"]);
%!     confirm_recursive_rmdir (false, "local");
%!     sts = rmdir (dirname, "s");
%!   end_unwind_protect
%! unwind_protect_cleanup
%!   ## Restore environment variables TMPDIR, TMP
%!   for i = 1:numel (envvar)
%!     if (isempty (envdir{i}))
%!       unsetenv (envvar{i});
%!     else
%!       setenv (envvar{i}, envdir{i});
%!     endif
%!   endfor
%! end_unwind_protect

## Test input validation
%!error <Invalid call> unpack ()
%!error <FILE must be a string or cell array of strings> unpack (1)
%!error <FILE "_%NOT_A_FILENAME%_" not found> unpack ("_%NOT_A_FILENAME%_")
%!error <FILE "_%NOT_A_FILENAME%_" not found> unpack ({"_%NOT_A_FILENAME%_"})
%!error <FILE "_%NOT_A_FILENAME%_" not found> unpack ({"_%NOT_A_FILENAME%_", "2nd_filename"})
%!error <FILETYPE must be a string>
%! if (isunix || ismac)
%!   unpack ("/", [], 1)
%! else
%!   unpack ('C:\', [], 1)
%! endif
%!error <FILETYPE must be given for a directory>
%! if (isunix || ismac)
%!   unpack ("/");
%! else
%!   unpack ('C:\');
%! endif
%!error <FILETYPE must be "gz" for a directory>
%! if (isunix || ismac)
%!   unpack ("/", [], "foobar");
%! else
%!   unpack ('C:\', [], "foobar");
%! endif