changeset 19268:6ebc2ca01c76

unpack.m: Overhaul function. * unpack.m: Redo docstring. Match function variable name to documentation name. Correctly report error if file is not found and input is cellstr. Reword error() messages to be clearer. Allow case insensitive matching of extensions such as ".gz" or ".GZ". Fix huge problems with files not ending up in destination directory specified. Eliminate for loop and call strrep directly with cell array input. Add BIST tests.
author Rik <rik@octave.org>
date Tue, 07 Oct 2014 13:13:16 -0700
parents 0ded8964c13a
children cac9d4c49522
files scripts/miscellaneous/unpack.m
diffstat 1 files changed, 132 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/miscellaneous/unpack.m	Thu Sep 18 11:05:52 2014 +0200
+++ b/scripts/miscellaneous/unpack.m	Tue Oct 07 13:13:16 2014 -0700
@@ -21,10 +21,49 @@
 ## @deftypefnx {Function File} {@var{files} =} unpack (@var{file}, @var{dir})
 ## @deftypefnx {Function File} {@var{files} =} unpack (@var{file}, @var{dir}, @var{filetype})
 ## Unpack the archive @var{file} based on its extension to the directory
-## @var{dir}.  If @var{file} is a list of strings, then each file is
-## unpacked individually.  If @var{dir} is not specified, it defaults to
-## the current directory.  If a directory is in the file list, then the
-## @var{filetype} must also be specified.
+## @var{dir}.
+##
+## If @var{file} is a list of strings, then each file is unpacked
+## individually.  Shell wildcards in the filename such as @samp{*} or @samp{?}
+## are accepted and expanded.
+##
+## If @var{dir} is not specified or is empty (@code{[]}), it defaults to the
+## current directory.  If a directory is in the file list, then @var{filetype}
+## must also be specified.
+##
+## The specific archive filetype is inferred from the extension of the file.
+## The @var{filetype} may also be specified directly using a string which
+## corresponds to a known extension.
+##
+## Valid filetype extensions:
+##
+## @table @code
+## @item  bz
+## @itemx bz2
+## bzip archive
+##
+## @item gz
+## gzip archive
+##
+## @item tar
+## tar archive
+##
+## @item  tarbz
+## @itemxtarbz2
+## @itemx tbz
+## @itemx tbz2
+## tar + bzip archive
+##
+## @item  targz
+## @itemx tgz
+## tar + gzip archive
+##
+## @item z
+## compress archive
+##
+## @item zip
+## zip archive
+## @end table
 ##
 ## The optional return value is a list of @var{files} unpacked.
 ## @seealso{bzip2, gzip, zip, tar}
@@ -32,27 +71,31 @@
 
 ## Author: Bill Denney <denney@seas.upenn.edu>
 
-function filelist = unpack (filename, dir = ".", filetype = "")
+function filelist = unpack (file, dir = ".", filetype = "")
 
   if (nargin < 1 || nargin > 3)
     print_usage ();
   endif
 
-  if (! ischar (filename) && ! iscellstr (filename))
-    error ("unpack: invalid input file class, %s", class (filename));
+  if (! ischar (file) && ! iscellstr (file))
+    error ("unpack: FILE must be a string or cell array of strings");
   endif
 
-  ## character arrays of more than one string must be treated as cell strings
-  if (ischar (filename) && ! isvector (filename))
-    file = cellstr (filename);
-  else
-    file = glob (filename);
+  ## Convert char arrays to cell strings to simplify further processing
+  if (ischar (file))
+    file = cellstr (file);
+  endif
+  if (numel (file) == 1)
+    gfile = glob (file);
+    if (isempty (gfile))
+      error ('unpack: file "%s" not found', file{1});
+    else
+      file = gfile;
+    endif
   endif
 
   ## Recursively unpack cellstr arrays one file at a time
-  n_elem = numel (file);
-  if (n_elem > 1)
-    
+  if (numel (file) > 1)
     files = {};
     for i = 1:numel (file)
       tmpfiles = unpack (file{i}, dir);
@@ -63,18 +106,17 @@
     if (nargout > 0)
       filelist = files;
     endif
+    return;
 
-    return;
-    
   else
-    file = filename;
+    file = file{1};
   endif
 
   if (isdir (file))
     if (isempty (filetype))
       error ("unpack: FILETYPE must be given for a directory");
     elseif (! any (strcmpi (filetype, "gunzip")))
-      error ("unpack: FILETYPE must be gunzip for a directory");
+      error ('unpack: FILETYPE must be "gunzip" for a directory');
     endif
     ext = ".gz";
   else
@@ -91,28 +133,18 @@
 
     ## If the file is a URL, download it and then work with that file.
     if (! isempty (strfind (file, "://")))
-      ## FIXME: The above is not a perfect test for a URL
+      ## FIXME: The above code is not a perfect test for a URL
       urlfile = file;
-      ## FIXME: Should we name the file that we download with the
-      ##        same file name as the URL requests?
       tmpfile = [tmpnam() ext];
       [file, success, msg] = urlwrite (urlfile, tmpfile);
       if (! success)
-        error ('unpack: could not get "%s": %s', urlfile, msg);
+        error ('unpack: could not fetch "%s": %s', urlfile, msg);
       endif
     endif
 
   endif
 
-  ## canonicalize_file_name returns empty if the file isn't found, so
-  ## use that to check for existence.
-  cfile = canonicalize_file_name (file);
-
-  if (isempty (cfile))
-    error ('unpack: file "%s" not found', file);
-  else
-    file = cfile;
-  endif
+  file = canonicalize_file_name (file);
 
   ## Instructions on what to do for any extension.
   ##
@@ -152,17 +184,21 @@
   endif
 
   ## Unzip doesn't actually care about the extension
-  if (strcmp (filetype, "unzip"))
+  if (strcmpi (filetype, "unzip"))
     nodotext = "zip";
   else
     nodotext = ext(ext != '.');
   endif
 
-  origdir = pwd ();
-
-  if (isfield (commandlist, nodotext))
-    [commandv, commandq, parser, move] = deal (commandlist.(nodotext){:});
-    cstartdir = canonicalize_file_name (origdir);
+  if (isfield (commandlist, tolower (nodotext)))
+    [commandv, commandq, parsefcn, move] = deal (commandlist.(nodotext){:});
+    origdir = pwd ();
+    if (move)
+      startdir = fileparts (file); 
+    else
+      startdir = origdir;
+    endif
+    cstartdir = canonicalize_file_name (startdir);
     cenddir = canonicalize_file_name (dir);
     needmove = move && ! strcmp (cstartdir, cenddir);
     if (nargout > 0 || needmove)
@@ -171,7 +207,7 @@
       command = commandq;
     endif
   else
-    warning ("unpack:filetype", "unrecognized file type, %s", ext);
+    warning ("unpack: unrecognized FILETYPE <%s>", ext);
     files = file;
     return;
   endif
@@ -205,19 +241,17 @@
     if (output(end) == "\n")
       output(end) = [];
     endif
-    files = parser (ostrsplit (output, "\n"))';
+    files = parsefcn (ostrsplit (output, "\n"))';
 
-    ## Move files if necessary
+    ## Move files if necessary.
     if (needmove)
-      [st, msg, ~] = movefile (files, dir);
+      [st, msg] = movefile (files, cenddir);
       if (! st)
         error ('unpack: unable to move files to "%s": %s', dir, msg);
       endif
 
-      ## Fix the names for the files since they were moved.
-      for i = 1:numel (files)
-        files{i} = strrep (files{i}, cstartdir, cenddir);
-      endfor
+      ## Fix the names of the files since they were moved.
+      files = strrep (files, cstartdir, cenddir);
     endif
 
     ## Return output if requested.
@@ -231,9 +265,9 @@
 function files = __parse_zip__ (output)
   ## Parse the output from zip and unzip.
 
-  ## Skip first line which is Archive header
+  ## Skip first line which is Archive header.
   files = char (output(2:end));
-  ## Trim constant width prefix and return cell array
+  ## Trim constant width prefix and return cell array.
   files = cellstr (files(:,14:end))
 endfunction
 
@@ -254,4 +288,53 @@
 
   ## Strip leading blanks and .bz2 extension from file name
   files = regexprep (output, '^\s+(.*)\.bz2: .*', '$1');
-endfunction
\ No newline at end of file
+endfunction
+
+
+## FIXME: Maybe there should be a combined test script to test all of the
+##        various pack/unpack routines in the tests/ directory.
+%!test
+%! ## Create temporary file for packing and unpacking
+%! fname = tempname ();
+%! fid = fopen (fname, "wt");
+%! if (fid < 0)
+%!   error ("unpack: Unable to open temporary file for testing");
+%! endif
+%! fprintf (fid, "Hello World\n");
+%! fprintf (fid, "123 456 789\n");
+%! fclose (fid);
+%!
+%! unwind_protect
+%!   copyfile (fname, [fname ".orig"]);
+%!   gzip (fname, P_tmpdir);
+%!   filelist = unpack ([fname ".gz"], P_tmpdir);
+%!   assert (filelist{1}, fname); 
+%!   r = system (sprintf ("diff %s %s.orig", fname, fname));
+%!   if (r)
+%!     error ("unpack: Unpacked file does not equal original");
+%!   endif
+%! unwind_protect_cleanup
+%!   exist (fname) && unlink (fname);
+%!   unlink ([fname ".orig"]);
+%! end_unwind_protect
+
+## Test input validation
+%!error unpack ()
+%!error unpack (1,2,3,4)
+%!error <FILE must be a string or cell array of strings> unpack (1)
+%!error <file "_%NOT_A_FILENAME%_" not found> unpack ("_%NOT_A_FILENAME%_")
+%!error <file "_%NOT_A_FILENAME%_" not found> unpack ({"_%NOT_A_FILENAME%_"})
+%!error <file "_%NOT_A_FILENAME%_" not found> unpack ({"_%NOT_A_FILENAME%_", "2nd_filename"})
+%!error <FILETYPE must be given for a directory>
+%! if (isunix || ismac)
+%!   unpack ("/");
+%! else
+%!   unpack ('C:\');
+%! endif
+%!error <FILETYPE must be "gunzip" for a directory>
+%! if (isunix || ismac)
+%!   unpack ("/", [], "foobar");
+%! else
+%!   unpack ('C:\', [], "foobar");
+%! endif
+