view scripts/miscellaneous/unpack.m @ 18891:7bbe3658c5ef

maint: Use "FIXME:" coding convention in m-files. * flipdim.m, prepad.m, rotdim.m, doc.m, strread.m, textread.m, krylov.m, colon.m, dump_prefs.m, fileattrib.m, getappdata.m, __xzip__.m, unpack.m, fsolve.m, axis.m, meshc.m, print.m, __ghostscript__.m, __go_draw_axes__.m, __print_parse_opts__.m, struct2hdl.m, unique.m, spstats.m, treeplot.m, test.m, datestr.m: Use "FIXME:" coding convention in m-files.
author Rik <rik@octave.org>
date Wed, 25 Jun 2014 13:45:41 -0700
parents d63878346099
children 0ded8964c13a
line wrap: on
line source

## Copyright (C) 2006-2013 Bill Denney
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn  {Function File} {@var{files} =} unpack (@var{file})
## @deftypefnx {Function File} {@var{files} =} unpack (@var{file}, @var{dir})
## @deftypefnx {Function File} {@var{files} =} unpack (@var{file}, @var{dir}, @var{filetype})
## Unpack the archive @var{file} based on its extension to the directory
## @var{dir}.  If @var{file} is a list of strings, then each file is
## unpacked individually.  If @var{dir} is not specified, it defaults to
## the current directory.  If a directory is in the file list, then the
## @var{filetype} must also be specified.
##
## The optional return value is a list of @var{files} unpacked.
## @seealso{bzip2, gzip, zip, tar}
## @end deftypefn

## Author: Bill Denney <denney@seas.upenn.edu>

function filelist = unpack (file, dir = ".", filetype = "")

  if (nargin < 1 || nargin > 3)
    print_usage ();
  endif

  if (! ischar (file) && ! iscellstr (file))
    error ("unpack: invalid input file class, %s", class (file));
  endif

  ## character arrays of more than one string must be treated as cell strings
  if (ischar (file) && ! isvector (file))
    file = cellstr (file);
  endif

  ## Recursively unpack cellstr arrays one file at a time
  if (iscellstr (file))
    files = {};
    for i = 1:numel (file)
      tmpfiles = unpack (file{i}, dir);
      files = {files{:} tmpfiles{:}};
    endfor

    ## Return output if requested.
    if (nargout > 0)
      filelist = files;
    endif

    return;
  endif

  if (isdir (file))
    if (isempty (filetype))
      error ("unpack: FILETYPE must be given for a directory");
    elseif (! any (strcmpi (filetype, "gunzip")))
      error ("unpack: FILETYPE must be gunzip for a directory");
    endif
    ext = ".gz";
  else
    [pathstr, name, ext] = fileparts (file);

    ## Check to see if it's .tar.gz, .tar.Z, etc.
    if (any (strcmpi ({".gz" ".Z" ".bz2" ".bz"}, ext)))
      [~, tmpname, tmpext] = fileparts (name);
      if (strcmpi (tmpext, ".tar"))
        name = tmpname;
        ext = [tmpext ext];
      endif
    endif

    ## If the file is a URL, download it and then work with that file.
    if (! isempty (strfind (file, "://")))
      ## FIXME: The above is not a perfect test for a URL
      urlfile = file;
      ## FIXME: Should we name the file that we download with the
      ##        same file name as the URL requests?
      tmpfile = [tmpnam() ext];
      [file, success, msg] = urlwrite (urlfile, tmpfile);
      if (! success)
        error ('unpack: could not get "%s": %s', urlfile, msg);
      endif
    endif

  endif

  ## canonicalize_file_name returns empty if the file isn't found, so
  ## use that to check for existence.
  cfile = canonicalize_file_name (file);

  if (isempty (cfile))
    error ('unpack: file "%s" not found', file);
  else
    file = cfile;
  endif

  ## Instructions on what to do for any extension.
  ##
  ## The field names are the file extension without periods.
  ## The first cell is what is executed to unpack an archive verbosely.
  ## The second cell is what is executed to unpack an archive quietly.
  ## The third cell is the function to execute on output to get the files list.
  ## The fourth cell indicates if the files may need to be manually moved
  ##   (i.e., tar and unzip decompress into the current directory while
  ##    bzip2 and gzip decompress the file at its location).
  persistent commandlist;
  if (isempty (commandlist))
    commandlist.gz = {'gzip -d -v -r "%s"', ...
                      'gzip -d -r "%s"', ...
                      @__parse_gzip__, true};
    commandlist.z = commandlist.gz;
    commandlist.bz2 = {'bzip2 -d -v "%s"', ...
                       'bzip2 -d "%s"', ...
                       @__parse_bzip2__, true};
    commandlist.bz = commandlist.bz2;
    commandlist.tar = {'tar xvf "%s"', ...
                       'tar xf "%s"', ...
                       @__parse_tar__, false};
    commandlist.targz = {'gzip -d -c "%s" | tar xvf -', ...
                         'gzip -d -c "%s" | tar xf -', ...
                         @__parse_tar__, false};
    commandlist.tgz = commandlist.targz;
    commandlist.tarbz2 = {'bzip2 -d -c "%s" | tar xvf -', ...
                          'bzip2 -d -c "%s" | tar xf -', ...
                          @__parse_tar__, false};
    commandlist.tarbz = commandlist.tarbz2;
    commandlist.tbz2 = commandlist.tarbz2;
    commandlist.tbz = commandlist.tarbz2;
    commandlist.zip = {'unzip -n "%s"', ...
                       'unzip -nq "%s"', ...
                       @__parse_zip__, false};
  endif

  ## Unzip doesn't actually care about the extension
  if (strcmp (filetype, "unzip"))
    nodotext = "zip";
  else
    nodotext = ext(ext != '.');
  endif

  origdir = pwd ();

  if (isfield (commandlist, nodotext))
    [commandv, commandq, parser, move] = deal (commandlist.(nodotext){:});
    cstartdir = canonicalize_file_name (origdir);
    cenddir = canonicalize_file_name (dir);
    needmove = move && ! strcmp (cstartdir, cenddir);
    if (nargout > 0 || needmove)
      command = commandv;
    else
      command = commandq;
    endif
  else
    warning ("unpack:filetype", "unrecognized file type, %s", ext);
    files = file;
    return;
  endif

  ## Create the directory if necessary.
  s = stat (dir);
  if (isempty (s))
    [status, msg] = mkdir (dir);
    if (! status)
      error ("unpack: mkdir failed to create %s: %s", dir, msg);
    endif
  elseif (! S_ISDIR (s.mode))
    error ("unpack: %s: not a directory", dir);
  endif

  unwind_protect
    cd (dir);
    [status, output] = system (sprintf ([command " 2>&1"], file));
  unwind_protect_cleanup
    cd (origdir);
  end_unwind_protect

  if (status)
    error ("unpack: unarchiving program exited with status: %d\n%s",
           status, output);
  endif

  if (nargout > 0 || needmove)
    ## Trim the last CR if needed.
    ## FIXME: Will this need to change to a check for "\r\n" for windows?
    if (output(end) == "\n")
      output(end) = [];
    endif
    files = parser (ostrsplit (output, "\n"))';

    ## Move files if necessary
    if (needmove)
      [st, msg, ~] = movefile (files, dir);
      if (! st)
        error ('unpack: unable to move files to "%s": %s', dir, msg);
      endif

      ## Fix the names for the files since they were moved.
      for i = 1:numel (files)
        files{i} = strrep (files{i}, cstartdir, cenddir);
      endfor
    endif

    ## Return output if requested.
    if (nargout > 0)
      filelist = files;
    endif
  endif

endfunction

function files = __parse_zip__ (output)
  ## Parse the output from zip and unzip.

  ## Skip first line which is Archive header
  files = char (output(2:end));
  ## Trim constant width prefix and return cell array
  files = cellstr (files(:,14:end))
endfunction

function output = __parse_tar__ (output)
  ## This is a no-op, but it makes things simpler for other cases.
endfunction

function files = __parse_gzip__ (output)
  ## Parse the output from gzip and gunzip returning the files
  ## commpressed (or decompressed).

  files = regexprep (output, '^.+ with (.*)$', '$1');
endfunction

function files = __parse_bzip2__ (output)
  ## Parse the output from bzip2 and bunzip2 returning the files
  ## commpressed (or decompressed).

  ## Strip leading blanks and .bz2 extension from file name
  files = regexprep (output, '^\s+(.*)\.bz2: .*', '$1');
endfunction