comparison libinterp/dldfcn/gzip.cc @ 31607:aac27ad79be6 stable

maint: Re-indent code after switch to using namespace macros. * build-env.h, build-env.in.cc, Cell.h, __betainc__.cc, __eigs__.cc, __ftp__.cc, __ichol__.cc, __ilu__.cc, __isprimelarge__.cc, __magick_read__.cc, __pchip_deriv__.cc, amd.cc, base-text-renderer.cc, base-text-renderer.h, besselj.cc, bitfcns.cc, bsxfun.cc, c-file-ptr-stream.h, call-stack.cc, call-stack.h, ccolamd.cc, cellfun.cc, chol.cc, colamd.cc, dasrt.cc, data.cc, debug.cc, defaults.cc, defaults.h, det.cc, display.cc, display.h, dlmread.cc, dynamic-ld.cc, dynamic-ld.h, ellipj.cc, environment.cc, environment.h, error.cc, error.h, errwarn.h, event-manager.cc, event-manager.h, event-queue.cc, event-queue.h, fcn-info.cc, fcn-info.h, fft.cc, fft2.cc, file-io.cc, filter.cc, find.cc, ft-text-renderer.cc, ft-text-renderer.h, gcd.cc, gl-render.cc, gl-render.h, gl2ps-print.cc, gl2ps-print.h, graphics-toolkit.cc, graphics-toolkit.h, graphics.cc, gsvd.cc, gtk-manager.cc, gtk-manager.h, help.cc, help.h, hook-fcn.cc, hook-fcn.h, input.cc, input.h, interpreter-private.cc, interpreter-private.h, interpreter.cc, interpreter.h, inv.cc, jsondecode.cc, jsonencode.cc, latex-text-renderer.cc, latex-text-renderer.h, load-path.cc, load-path.h, load-save.cc, load-save.h, lookup.cc, ls-hdf5.cc, ls-mat4.cc, ls-mat5.cc, lsode.cc, lu.cc, mappers.cc, matrix_type.cc, max.cc, mex.cc, mexproto.h, mxarray.h, mxtypes.in.h, oct-errno.in.cc, oct-hdf5-types.cc, oct-hist.cc, oct-hist.h, oct-map.cc, oct-map.h, oct-opengl.h, oct-prcstrm.h, oct-process.cc, oct-process.h, oct-stdstrm.h, oct-stream.cc, oct-stream.h, oct-strstrm.h, octave-default-image.h, ordqz.cc, ordschur.cc, pager.cc, pager.h, pinv.cc, pow2.cc, pr-output.cc, psi.cc, qr.cc, quadcc.cc, rand.cc, regexp.cc, settings.cc, settings.h, sighandlers.cc, sighandlers.h, sparse-xpow.cc, sqrtm.cc, stack-frame.cc, stack-frame.h, stream-euler.cc, strfns.cc, svd.cc, syminfo.cc, syminfo.h, symrcm.cc, symrec.cc, symrec.h, symscope.cc, symscope.h, symtab.cc, symtab.h, sysdep.cc, sysdep.h, text-engine.cc, text-engine.h, text-renderer.cc, text-renderer.h, time.cc, toplev.cc, typecast.cc, url-handle-manager.cc, url-handle-manager.h, urlwrite.cc, utils.cc, utils.h, variables.cc, variables.h, xdiv.cc, __delaunayn__.cc, __init_fltk__.cc, __init_gnuplot__.cc, __ode15__.cc, __voronoi__.cc, audioread.cc, convhulln.cc, gzip.cc, cdef-class.cc, cdef-class.h, cdef-fwd.h, cdef-manager.cc, cdef-manager.h, cdef-method.cc, cdef-method.h, cdef-object.cc, cdef-object.h, cdef-package.cc, cdef-package.h, cdef-property.cc, cdef-property.h, cdef-utils.cc, cdef-utils.h, ov-base-diag.cc, ov-base-int.cc, ov-base-mat.cc, ov-base-mat.h, ov-base-scalar.cc, ov-base.cc, ov-base.h, ov-bool-mat.cc, ov-bool-mat.h, ov-bool-sparse.cc, ov-bool.cc, ov-builtin.h, ov-cell.cc, ov-ch-mat.cc, ov-class.cc, ov-class.h, ov-classdef.cc, ov-classdef.h, ov-complex.cc, ov-cx-diag.cc, ov-cx-mat.cc, ov-cx-sparse.cc, ov-dld-fcn.cc, ov-dld-fcn.h, ov-fcn-handle.cc, ov-fcn-handle.h, ov-fcn.h, ov-float.cc, ov-flt-complex.cc, ov-flt-cx-diag.cc, ov-flt-cx-mat.cc, ov-flt-re-diag.cc, ov-flt-re-mat.cc, ov-flt-re-mat.h, ov-intx.h, ov-java.cc, ov-lazy-idx.cc, ov-legacy-range.cc, ov-magic-int.cc, ov-mex-fcn.cc, ov-mex-fcn.h, ov-null-mat.cc, ov-perm.cc, ov-range.cc, ov-re-diag.cc, ov-re-mat.cc, ov-re-mat.h, ov-re-sparse.cc, ov-scalar.cc, ov-str-mat.cc, ov-struct.cc, ov-typeinfo.cc, ov-typeinfo.h, ov-usr-fcn.cc, ov-usr-fcn.h, ov.cc, ov.h, ovl.h, octave.cc, octave.h, op-b-sbm.cc, op-bm-sbm.cc, op-cs-scm.cc, op-fm-fcm.cc, op-fs-fcm.cc, op-s-scm.cc, op-scm-cs.cc, op-scm-s.cc, op-sm-cs.cc, ops.h, anon-fcn-validator.cc, anon-fcn-validator.h, bp-table.cc, bp-table.h, comment-list.cc, comment-list.h, filepos.h, lex.h, oct-lvalue.cc, oct-lvalue.h, parse.h, profiler.cc, profiler.h, pt-anon-scopes.cc, pt-anon-scopes.h, pt-arg-list.cc, pt-arg-list.h, pt-args-block.cc, pt-args-block.h, pt-array-list.cc, pt-array-list.h, pt-assign.cc, pt-assign.h, pt-binop.cc, pt-binop.h, pt-bp.cc, pt-bp.h, pt-cbinop.cc, pt-cbinop.h, pt-cell.cc, pt-cell.h, pt-check.cc, pt-check.h, pt-classdef.cc, pt-classdef.h, pt-cmd.h, pt-colon.cc, pt-colon.h, pt-const.cc, pt-const.h, pt-decl.cc, pt-decl.h, pt-eval.cc, pt-eval.h, pt-except.cc, pt-except.h, pt-exp.cc, pt-exp.h, pt-fcn-handle.cc, pt-fcn-handle.h, pt-id.cc, pt-id.h, pt-idx.cc, pt-idx.h, pt-jump.h, pt-loop.cc, pt-loop.h, pt-mat.cc, pt-mat.h, pt-misc.cc, pt-misc.h, pt-pr-code.cc, pt-pr-code.h, pt-select.cc, pt-select.h, pt-spmd.cc, pt-spmd.h, pt-stmt.cc, pt-stmt.h, pt-tm-const.cc, pt-tm-const.h, pt-unop.cc, pt-unop.h, pt-walk.cc, pt-walk.h, pt.cc, pt.h, token.cc, token.h, Range.cc, Range.h, idx-vector.cc, idx-vector.h, range-fwd.h, CollocWt.cc, CollocWt.h, aepbalance.cc, aepbalance.h, chol.cc, chol.h, gepbalance.cc, gepbalance.h, gsvd.cc, gsvd.h, hess.cc, hess.h, lo-mappers.cc, lo-mappers.h, lo-specfun.cc, lo-specfun.h, lu.cc, lu.h, oct-convn.cc, oct-convn.h, oct-fftw.cc, oct-fftw.h, oct-norm.cc, oct-norm.h, oct-rand.cc, oct-rand.h, oct-spparms.cc, oct-spparms.h, qr.cc, qr.h, qrp.cc, qrp.h, randgamma.cc, randgamma.h, randmtzig.cc, randmtzig.h, randpoisson.cc, randpoisson.h, schur.cc, schur.h, sparse-chol.cc, sparse-chol.h, sparse-lu.cc, sparse-lu.h, sparse-qr.cc, sparse-qr.h, svd.cc, svd.h, child-list.cc, child-list.h, dir-ops.cc, dir-ops.h, file-ops.cc, file-ops.h, file-stat.cc, file-stat.h, lo-sysdep.cc, lo-sysdep.h, lo-sysinfo.cc, lo-sysinfo.h, mach-info.cc, mach-info.h, oct-env.cc, oct-env.h, oct-group.cc, oct-group.h, oct-password.cc, oct-password.h, oct-syscalls.cc, oct-syscalls.h, oct-time.cc, oct-time.h, oct-uname.cc, oct-uname.h, action-container.cc, action-container.h, base-list.h, cmd-edit.cc, cmd-edit.h, cmd-hist.cc, cmd-hist.h, f77-fcn.h, file-info.cc, file-info.h, lo-array-errwarn.cc, lo-array-errwarn.h, lo-hash.cc, lo-hash.h, lo-ieee.h, lo-regexp.cc, lo-regexp.h, lo-utils.cc, lo-utils.h, oct-base64.cc, oct-base64.h, oct-glob.cc, oct-glob.h, oct-inttypes.h, oct-mutex.cc, oct-mutex.h, oct-refcount.h, oct-shlib.cc, oct-shlib.h, oct-sparse.cc, oct-sparse.h, oct-string.h, octave-preserve-stream-state.h, pathsearch.cc, pathsearch.h, quit.cc, quit.h, unwind-prot.cc, unwind-prot.h, url-transfer.cc, url-transfer.h: Re-indent code after switch to using namespace macros.
author Rik <rik@octave.org>
date Thu, 01 Dec 2022 18:02:15 -0800
parents e88a07dec498
children 597f3ee61a48
comparison
equal deleted inserted replaced
31605:e88a07dec498 31607:aac27ad79be6
83 # include <zlib.h> 83 # include <zlib.h>
84 #endif 84 #endif
85 85
86 OCTAVE_BEGIN_NAMESPACE(octave) 86 OCTAVE_BEGIN_NAMESPACE(octave)
87 87
88 //! RIIA wrapper for std::FILE*. 88 //! RIIA wrapper for std::FILE*.
89 //! 89 //!
90 //! If error handling is available for failing to close the file, use 90 //! If error handling is available for failing to close the file, use
91 //! the close method which throws. 91 //! the close method which throws.
92 //! 92 //!
93 //! If the file has been closed, fp is set to nullptr. Remember that 93 //! If the file has been closed, fp is set to nullptr. Remember that
94 //! behavior is undefined if the value of the pointer stream is used 94 //! behavior is undefined if the value of the pointer stream is used
95 //! after fclose. 95 //! after fclose.
96 96
97 class CFile 97 class CFile
98 {
99 public:
100
101 CFile (void) = delete;
102
103 CFile (const std::string& path, const std::string& mode)
104 : m_fp (sys::fopen (path, mode))
105 {
106 if (! m_fp)
107 throw std::runtime_error ("unable to open file");
108 }
109
110 CFile (const CFile&) = delete;
111
112 CFile& operator = (const CFile&) = delete;
113
114 ~CFile (void)
115 {
116 if (m_fp)
117 std::fclose (m_fp);
118 }
119
120 void close (void)
121 {
122 if (std::fclose (m_fp))
123 throw std::runtime_error ("unable to close file");
124
125 m_fp = nullptr;
126 }
127
128 std::FILE *m_fp;
129 };
130
131 #if defined (HAVE_BZ2)
132
133 class bz2
134 {
135 public:
136
137 static const constexpr char *extension = ".bz2";
138
139 static void zip (const std::string& source_path,
140 const std::string& dest_path)
141 {
142 bz2::zipper z (source_path, dest_path);
143 z.deflate ();
144 z.close ();
145 }
146
147 private:
148
149 class zipper
98 { 150 {
99 public: 151 public:
100 152
101 CFile (void) = delete; 153 zipper (void) = delete;
102 154
103 CFile (const std::string& path, const std::string& mode) 155 zipper (const std::string& source_path, const std::string& dest_path)
104 : m_fp (sys::fopen (path, mode)) 156 : m_status (BZ_OK), m_source (source_path, "rb"),
105 { 157 m_dest (dest_path, "wb"),
106 if (! m_fp) 158 m_bz (BZ2_bzWriteOpen (&m_status, m_dest.m_fp, 9, 0, 30))
107 throw std::runtime_error ("unable to open file"); 159 {
108 } 160 if (m_status != BZ_OK)
109 161 throw std::runtime_error ("failed to open bzip2 stream");
110 CFile (const CFile&) = delete; 162 }
111 163
112 CFile& operator = (const CFile&) = delete; 164 zipper (const zipper&) = delete;
113 165
114 ~CFile (void) 166 zipper& operator = (const zipper&) = delete;
115 { 167
116 if (m_fp) 168 ~zipper (void)
117 std::fclose (m_fp); 169 {
170 if (m_bz != nullptr)
171 BZ2_bzWriteClose (&m_status, m_bz, 1, nullptr, nullptr);
172 }
173
174 void deflate (void)
175 {
176 const std::size_t buf_len = 8192;
177 char buf[buf_len];
178 std::size_t n_read;
179 while ((n_read = std::fread (buf, sizeof (buf[0]), buf_len, m_source.m_fp)) != 0)
180 {
181 if (std::ferror (m_source.m_fp))
182 throw std::runtime_error ("failed to read from source file");
183 BZ2_bzWrite (&m_status, m_bz, buf, n_read);
184 if (m_status == BZ_IO_ERROR)
185 throw std::runtime_error ("failed to write or compress");
186 }
187 if (std::ferror (m_source.m_fp))
188 throw std::runtime_error ("failed to read from source file");
118 } 189 }
119 190
120 void close (void) 191 void close (void)
121 { 192 {
122 if (std::fclose (m_fp)) 193 int abandon = (m_status == BZ_IO_ERROR) ? 1 : 0;
123 throw std::runtime_error ("unable to close file"); 194 BZ2_bzWriteClose (&m_status, m_bz, abandon, nullptr, nullptr);
124 195 if (m_status != BZ_OK)
125 m_fp = nullptr; 196 throw std::runtime_error ("failed to close bzip2 stream");
126 } 197 m_bz = nullptr;
127 198
128 std::FILE *m_fp; 199 // We have no error handling for failing to close source, let
200 // the destructor close it.
201 m_dest.close ();
202 }
203
204 private:
205
206 int m_status;
207 CFile m_source;
208 CFile m_dest;
209 BZFILE *m_bz;
129 }; 210 };
130 211 };
131 #if defined (HAVE_BZ2) 212
132 213 #endif
133 class bz2 214
215 // Note about zlib and gzip
216 //
217 // gzip is a format for compressed single files. zlib is a format
218 // designed for in-memory and communication channel applications.
219 // gzip uses the same format internally for the compressed data but
220 // has different headers and trailers.
221 //
222 // zlib is also a library but gzip is not. Very old versions of zlib do
223 // not include functions to create useful gzip headers and trailers:
224 //
225 // Note that you cannot specify special gzip header contents (e.g.
226 // a file name or modification date), nor will inflate tell you what
227 // was in the gzip header. If you need to customize the header or
228 // see what's in it, you can use the raw deflate and inflate
229 // operations and the crc32() function and roll your own gzip
230 // encoding and decoding. Read the gzip RFC 1952 for details of the
231 // header and trailer format.
232 // zlib FAQ
233 //
234 // Recent versions (on which we are already dependent) have deflateInit2()
235 // to do it. We still need to get the right metadata for the header
236 // ourselves though.
237 //
238 // The header is defined in RFC #1952
239 // GZIP file format specification version 4.3
240
241
242 #if defined (HAVE_Z)
243
244 class gz
245 {
246 public:
247
248 static const constexpr char *extension = ".gz";
249
250 static void zip (const std::string& source_path,
251 const std::string& dest_path)
252 {
253 gz::zipper z (source_path, dest_path);
254 z.deflate ();
255 z.close ();
256 }
257
258 private:
259
260 // Util class to get a non-const char*
261 class uchar_array
134 { 262 {
135 public: 263 public:
136 264
137 static const constexpr char *extension = ".bz2"; 265 // Bytef is a typedef for unsigned char
138 266 unsigned char *p;
139 static void zip (const std::string& source_path, 267
140 const std::string& dest_path) 268 uchar_array (void) = delete;
141 { 269
142 bz2::zipper z (source_path, dest_path); 270 uchar_array (const std::string& str)
143 z.deflate (); 271 {
144 z.close (); 272 p = new Bytef[str.length () + 1];
145 } 273 std::strcpy (reinterpret_cast<char *> (p), str.c_str ());
274 }
275
276 uchar_array (const uchar_array&) = delete;
277
278 uchar_array& operator = (const uchar_array&) = delete;
279
280 ~uchar_array (void) { delete[] p; }
281 };
282
283 class gzip_header : public gz_header
284 {
285 public:
286
287 gzip_header (void) = delete;
288
289 gzip_header (const std::string& source_path)
290 : m_basename (sys::env::base_pathname (source_path))
291 {
292 const sys::file_stat source_stat (source_path);
293 if (! source_stat)
294 throw std::runtime_error ("unable to stat source file");
295
296 // time_t may be a signed int in which case it will be a
297 // positive number so it is safe to uLong. Or is it? Can
298 // unix_time really never be negative?
299 time = uLong (source_stat.mtime ().unix_time ());
300
301 // If FNAME is set, an original file name is present,
302 // terminated by a zero byte. The name must consist of ISO
303 // 8859-1 (LATIN-1) characters; on operating systems using
304 // EBCDIC or any other character set for file names, the name
305 // must be translated to the ISO LATIN-1 character set. This
306 // is the original name of the file being compressed, with any
307 // directory components removed, and, if the file being
308 // compressed is on a file system with case insensitive names,
309 // forced to lower case.
310 name = m_basename.p;
311
312 // If we don't set it to Z_NULL, then it will set FCOMMENT (4th bit)
313 // on the FLG byte, and then write {0, 3} comment.
314 comment = Z_NULL;
315
316 // Seems to already be the default but we are not taking chances.
317 extra = Z_NULL;
318
319 // We do not want a CRC for the header. That would be only 2 more
320 // bytes, and maybe it would be a good thing but we want to generate
321 // gz files similar to the default gzip application.
322 hcrc = 0;
323
324 // OS (Operating System):
325 // 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
326 // 1 - Amiga
327 // 2 - VMS (or OpenVMS)
328 // 3 - Unix
329 // 4 - VM/CMS
330 // 5 - Atari TOS
331 // 6 - HPFS filesystem (OS/2, NT)
332 // 7 - Macintosh
333 // 8 - Z-System
334 // 9 - CP/M
335 // 10 - TOPS-20
336 // 11 - NTFS filesystem (NT)
337 // 12 - QDOS
338 // 13 - Acorn RISCOS
339 // 255 - unknown
340 //
341 // The list is problematic because it mixes OS and filesystem. It
342 // also does not specify whether filesystem relates to source or
343 // destination file.
344
345 #if defined (__WIN32__)
346 // Or should it be 11?
347 os = 0;
348 #elif defined (__APPLE__)
349 os = 7;
350 #else
351 // Unix by default?
352 os = 3;
353 #endif
354 }
355
356 gzip_header (const gzip_header&) = delete;
357
358 gzip_header& operator = (const gzip_header&) = delete;
359
360 ~gzip_header (void) = default;
146 361
147 private: 362 private:
148 363
149 class zipper 364 // This must be kept for gz_header.name
150 { 365 uchar_array m_basename;
151 public: 366 };
152 367
153 zipper (void) = delete; 368 class zipper
154 369 {
155 zipper (const std::string& source_path, const std::string& dest_path) 370 public:
156 : m_status (BZ_OK), m_source (source_path, "rb"), 371
157 m_dest (dest_path, "wb"), 372 zipper (void) = delete;
158 m_bz (BZ2_bzWriteOpen (&m_status, m_dest.m_fp, 9, 0, 30)) 373
374 zipper (const std::string& source_path, const std::string& dest_path)
375 : m_source (source_path, "rb"), m_dest (dest_path, "wb"),
376 m_header (source_path), m_strm (new z_stream)
377 {
378 m_strm->zalloc = Z_NULL;
379 m_strm->zfree = Z_NULL;
380 m_strm->opaque = Z_NULL;
381 }
382
383 zipper (const zipper&) = delete;
384
385 zipper& operator = (const zipper&) = delete;
386
387 ~zipper (void)
388 {
389 if (m_strm)
390 deflateEnd (m_strm);
391 delete m_strm;
392 }
393
394 void deflate (void)
395 {
396 // int deflateInit2 (z_streamp m_strm,
397 // int level, // compression level (default is 8)
398 // int method,
399 // int windowBits, // 15 (default) + 16 (gzip format)
400 // int memLevel, // memory usage (default is 8)
401 // int strategy);
402 int status = deflateInit2 (m_strm, 8, Z_DEFLATED, 31, 8,
403 Z_DEFAULT_STRATEGY);
404 if (status != Z_OK)
405 throw std::runtime_error ("failed to open zlib stream");
406
407 deflateSetHeader (m_strm, &m_header);
408
409 const std::size_t buf_len = 8192;
410 unsigned char buf_in[buf_len];
411 unsigned char buf_out[buf_len];
412
413 int flush;
414
415 do
416 {
417 m_strm->avail_in = std::fread (buf_in, sizeof (buf_in[0]),
418 buf_len, m_source.m_fp);
419
420 if (std::ferror (m_source.m_fp))
421 throw std::runtime_error ("failed to read source file");
422
423 m_strm->next_in = buf_in;
424 flush = (std::feof (m_source.m_fp) ? Z_FINISH : Z_NO_FLUSH);
425
426 // If deflate returns Z_OK and with zero avail_out, it must be
427 // called again after making room in the output buffer because
428 // there might be more output pending.
429 do
430 {
431 m_strm->avail_out = buf_len;
432 m_strm->next_out = buf_out;
433 status = ::deflate (m_strm, flush);
434 if (status == Z_STREAM_ERROR)
435 throw std::runtime_error ("failed to deflate");
436
437 std::fwrite (buf_out, sizeof (buf_out[0]),
438 buf_len - m_strm->avail_out, m_dest.m_fp);
439 if (std::ferror (m_dest.m_fp))
440 throw std::runtime_error ("failed to write file");
441 }
442 while (m_strm->avail_out == 0);
443
444 if (m_strm->avail_in != 0)
445 throw std::runtime_error ("failed to write file");
446
447 }
448 while (flush != Z_FINISH);
449
450 if (status != Z_STREAM_END)
451 throw std::runtime_error ("failed to write file");
452 }
453
454 void close (void)
455 {
456 if (deflateEnd (m_strm) != Z_OK)
457 throw std::runtime_error ("failed to close zlib stream");
458 m_strm = nullptr;
459
460 // We have no error handling for failing to close source, let
461 // the destructor close it.
462 m_dest.close ();
463 }
464
465 private:
466
467 CFile m_source;
468 CFile m_dest;
469 gzip_header m_header;
470 z_stream *m_strm;
471 };
472 };
473
474 #endif
475
476
477 template<typename X>
478 string_vector
479 xzip (const Array<std::string>& source_patterns,
480 const std::function<std::string(const std::string&)>& mk_dest_path)
481 {
482 std::list<std::string> dest_paths;
483
484 std::function<void(const std::string&)> walk;
485 walk = [&walk, &mk_dest_path, &dest_paths] (const std::string& path) -> void
486 {
487 const sys::file_stat fs (path);
488 // is_dir and is_reg will return false if failed to stat.
489 if (fs.is_dir ())
159 { 490 {
160 if (m_status != BZ_OK) 491 string_vector dirlist;
161 throw std::runtime_error ("failed to open bzip2 stream"); 492 std::string msg;
493
494 // Collect the whole list of filenames first, before recursion
495 // to avoid issues with infinite loop if the action generates
496 // files in the same directory (highly likely).
497 if (sys::get_dirlist (path, dirlist, msg))
498 {
499 for (octave_idx_type i = 0; i < dirlist.numel (); i++)
500 if (dirlist(i) != "." && dirlist(i) != "..")
501 walk (sys::file_ops::concat (path, dirlist(i)));
502 }
503 // Note that we skip any problem with directories.
162 } 504 }
163 505 else if (fs.is_reg ())
164 zipper (const zipper&) = delete;
165
166 zipper& operator = (const zipper&) = delete;
167
168 ~zipper (void)
169 { 506 {
170 if (m_bz != nullptr) 507 const std::string dest_path = mk_dest_path (path);
171 BZ2_bzWriteClose (&m_status, m_bz, 1, nullptr, nullptr); 508 try
509 {
510 X::zip (path, dest_path);
511 }
512 catch (const interrupt_exception&)
513 {
514 throw; // interrupts are special, just re-throw.
515 }
516 catch (...)
517 {
518 // Error "handling" is not including filename on the output list.
519 // Also, remove created file which may not have been created
520 // in the first place. Note that it is possible for the file
521 // to exist before the call to X::zip and that X::zip has not
522 // clobber it yet, but we remove it anyway.
523 sys::unlink (dest_path);
524 return;
525 }
526 dest_paths.push_front (dest_path);
172 } 527 }
173 528 // Skip all other file types and errors.
174 void deflate (void) 529 return;
175 {
176 const std::size_t buf_len = 8192;
177 char buf[buf_len];
178 std::size_t n_read;
179 while ((n_read = std::fread (buf, sizeof (buf[0]), buf_len, m_source.m_fp)) != 0)
180 {
181 if (std::ferror (m_source.m_fp))
182 throw std::runtime_error ("failed to read from source file");
183 BZ2_bzWrite (&m_status, m_bz, buf, n_read);
184 if (m_status == BZ_IO_ERROR)
185 throw std::runtime_error ("failed to write or compress");
186 }
187 if (std::ferror (m_source.m_fp))
188 throw std::runtime_error ("failed to read from source file");
189 }
190
191 void close (void)
192 {
193 int abandon = (m_status == BZ_IO_ERROR) ? 1 : 0;
194 BZ2_bzWriteClose (&m_status, m_bz, abandon, nullptr, nullptr);
195 if (m_status != BZ_OK)
196 throw std::runtime_error ("failed to close bzip2 stream");
197 m_bz = nullptr;
198
199 // We have no error handling for failing to close source, let
200 // the destructor close it.
201 m_dest.close ();
202 }
203
204 private:
205
206 int m_status;
207 CFile m_source;
208 CFile m_dest;
209 BZFILE *m_bz;
210 };
211 }; 530 };
212 531
213 #endif 532 for (octave_idx_type i = 0; i < source_patterns.numel (); i++)
214 533 {
215 // Note about zlib and gzip 534 const glob_match pattern (sys::file_ops::tilde_expand (source_patterns(i)));
216 // 535 const string_vector filepaths = pattern.glob ();
217 // gzip is a format for compressed single files. zlib is a format 536 for (octave_idx_type j = 0; j < filepaths.numel (); j++)
218 // designed for in-memory and communication channel applications. 537 walk (filepaths(j));
219 // gzip uses the same format internally for the compressed data but 538 }
220 // has different headers and trailers. 539 return string_vector (dest_paths);
221 // 540 }
222 // zlib is also a library but gzip is not. Very old versions of zlib do 541
223 // not include functions to create useful gzip headers and trailers: 542
224 // 543 template<typename X>
225 // Note that you cannot specify special gzip header contents (e.g. 544 string_vector
226 // a file name or modification date), nor will inflate tell you what 545 xzip (const Array<std::string>& source_patterns)
227 // was in the gzip header. If you need to customize the header or 546 {
228 // see what's in it, you can use the raw deflate and inflate 547 const std::string ext = X::extension;
229 // operations and the crc32() function and roll your own gzip 548 const std::function<std::string(const std::string&)> mk_dest_path
230 // encoding and decoding. Read the gzip RFC 1952 for details of the 549 = [&ext] (const std::string& source_path) -> std::string
231 // header and trailer format. 550 {
232 // zlib FAQ 551 return source_path + ext;
233 //
234 // Recent versions (on which we are already dependent) have deflateInit2()
235 // to do it. We still need to get the right metadata for the header
236 // ourselves though.
237 //
238 // The header is defined in RFC #1952
239 // GZIP file format specification version 4.3
240
241
242 #if defined (HAVE_Z)
243
244 class gz
245 {
246 public:
247
248 static const constexpr char *extension = ".gz";
249
250 static void zip (const std::string& source_path,
251 const std::string& dest_path)
252 {
253 gz::zipper z (source_path, dest_path);
254 z.deflate ();
255 z.close ();
256 }
257
258 private:
259
260 // Util class to get a non-const char*
261 class uchar_array
262 {
263 public:
264
265 // Bytef is a typedef for unsigned char
266 unsigned char *p;
267
268 uchar_array (void) = delete;
269
270 uchar_array (const std::string& str)
271 {
272 p = new Bytef[str.length () + 1];
273 std::strcpy (reinterpret_cast<char *> (p), str.c_str ());
274 }
275
276 uchar_array (const uchar_array&) = delete;
277
278 uchar_array& operator = (const uchar_array&) = delete;
279
280 ~uchar_array (void) { delete[] p; }
281 };
282
283 class gzip_header : public gz_header
284 {
285 public:
286
287 gzip_header (void) = delete;
288
289 gzip_header (const std::string& source_path)
290 : m_basename (sys::env::base_pathname (source_path))
291 {
292 const sys::file_stat source_stat (source_path);
293 if (! source_stat)
294 throw std::runtime_error ("unable to stat source file");
295
296 // time_t may be a signed int in which case it will be a
297 // positive number so it is safe to uLong. Or is it? Can
298 // unix_time really never be negative?
299 time = uLong (source_stat.mtime ().unix_time ());
300
301 // If FNAME is set, an original file name is present,
302 // terminated by a zero byte. The name must consist of ISO
303 // 8859-1 (LATIN-1) characters; on operating systems using
304 // EBCDIC or any other character set for file names, the name
305 // must be translated to the ISO LATIN-1 character set. This
306 // is the original name of the file being compressed, with any
307 // directory components removed, and, if the file being
308 // compressed is on a file system with case insensitive names,
309 // forced to lower case.
310 name = m_basename.p;
311
312 // If we don't set it to Z_NULL, then it will set FCOMMENT (4th bit)
313 // on the FLG byte, and then write {0, 3} comment.
314 comment = Z_NULL;
315
316 // Seems to already be the default but we are not taking chances.
317 extra = Z_NULL;
318
319 // We do not want a CRC for the header. That would be only 2 more
320 // bytes, and maybe it would be a good thing but we want to generate
321 // gz files similar to the default gzip application.
322 hcrc = 0;
323
324 // OS (Operating System):
325 // 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
326 // 1 - Amiga
327 // 2 - VMS (or OpenVMS)
328 // 3 - Unix
329 // 4 - VM/CMS
330 // 5 - Atari TOS
331 // 6 - HPFS filesystem (OS/2, NT)
332 // 7 - Macintosh
333 // 8 - Z-System
334 // 9 - CP/M
335 // 10 - TOPS-20
336 // 11 - NTFS filesystem (NT)
337 // 12 - QDOS
338 // 13 - Acorn RISCOS
339 // 255 - unknown
340 //
341 // The list is problematic because it mixes OS and filesystem. It
342 // also does not specify whether filesystem relates to source or
343 // destination file.
344
345 #if defined (__WIN32__)
346 // Or should it be 11?
347 os = 0;
348 #elif defined (__APPLE__)
349 os = 7;
350 #else
351 // Unix by default?
352 os = 3;
353 #endif
354 }
355
356 gzip_header (const gzip_header&) = delete;
357
358 gzip_header& operator = (const gzip_header&) = delete;
359
360 ~gzip_header (void) = default;
361
362 private:
363
364 // This must be kept for gz_header.name
365 uchar_array m_basename;
366 };
367
368 class zipper
369 {
370 public:
371
372 zipper (void) = delete;
373
374 zipper (const std::string& source_path, const std::string& dest_path)
375 : m_source (source_path, "rb"), m_dest (dest_path, "wb"),
376 m_header (source_path), m_strm (new z_stream)
377 {
378 m_strm->zalloc = Z_NULL;
379 m_strm->zfree = Z_NULL;
380 m_strm->opaque = Z_NULL;
381 }
382
383 zipper (const zipper&) = delete;
384
385 zipper& operator = (const zipper&) = delete;
386
387 ~zipper (void)
388 {
389 if (m_strm)
390 deflateEnd (m_strm);
391 delete m_strm;
392 }
393
394 void deflate (void)
395 {
396 // int deflateInit2 (z_streamp m_strm,
397 // int level, // compression level (default is 8)
398 // int method,
399 // int windowBits, // 15 (default) + 16 (gzip format)
400 // int memLevel, // memory usage (default is 8)
401 // int strategy);
402 int status = deflateInit2 (m_strm, 8, Z_DEFLATED, 31, 8,
403 Z_DEFAULT_STRATEGY);
404 if (status != Z_OK)
405 throw std::runtime_error ("failed to open zlib stream");
406
407 deflateSetHeader (m_strm, &m_header);
408
409 const std::size_t buf_len = 8192;
410 unsigned char buf_in[buf_len];
411 unsigned char buf_out[buf_len];
412
413 int flush;
414
415 do
416 {
417 m_strm->avail_in = std::fread (buf_in, sizeof (buf_in[0]),
418 buf_len, m_source.m_fp);
419
420 if (std::ferror (m_source.m_fp))
421 throw std::runtime_error ("failed to read source file");
422
423 m_strm->next_in = buf_in;
424 flush = (std::feof (m_source.m_fp) ? Z_FINISH : Z_NO_FLUSH);
425
426 // If deflate returns Z_OK and with zero avail_out, it must be
427 // called again after making room in the output buffer because
428 // there might be more output pending.
429 do
430 {
431 m_strm->avail_out = buf_len;
432 m_strm->next_out = buf_out;
433 status = ::deflate (m_strm, flush);
434 if (status == Z_STREAM_ERROR)
435 throw std::runtime_error ("failed to deflate");
436
437 std::fwrite (buf_out, sizeof (buf_out[0]),
438 buf_len - m_strm->avail_out, m_dest.m_fp);
439 if (std::ferror (m_dest.m_fp))
440 throw std::runtime_error ("failed to write file");
441 }
442 while (m_strm->avail_out == 0);
443
444 if (m_strm->avail_in != 0)
445 throw std::runtime_error ("failed to write file");
446
447 } while (flush != Z_FINISH);
448
449 if (status != Z_STREAM_END)
450 throw std::runtime_error ("failed to write file");
451 }
452
453 void close (void)
454 {
455 if (deflateEnd (m_strm) != Z_OK)
456 throw std::runtime_error ("failed to close zlib stream");
457 m_strm = nullptr;
458
459 // We have no error handling for failing to close source, let
460 // the destructor close it.
461 m_dest.close ();
462 }
463
464 private:
465
466 CFile m_source;
467 CFile m_dest;
468 gzip_header m_header;
469 z_stream *m_strm;
470 };
471 }; 552 };
472 553 return xzip<X> (source_patterns, mk_dest_path);
473 #endif 554 }
474 555
475 556 template<typename X>
476 template<typename X> 557 string_vector
477 string_vector 558 xzip (const Array<std::string>& source_patterns, const std::string& out_dir)
478 xzip (const Array<std::string>& source_patterns, 559 {
479 const std::function<std::string(const std::string&)>& mk_dest_path) 560 const std::string ext = X::extension;
480 { 561 const std::function<std::string(const std::string&)> mk_dest_path
481 std::list<std::string> dest_paths; 562 = [&out_dir, &ext] (const std::string& source_path) -> std::string
482 563 {
483 std::function<void(const std::string&)> walk; 564 // Strip any relative path (bug #58547)
484 walk = [&walk, &mk_dest_path, &dest_paths] (const std::string& path) -> void 565 std::size_t pos = source_path.find_last_of (sys::file_ops::dir_sep_str ());
485 { 566 const std::string basename =
486 const sys::file_stat fs (path); 567 (pos == std::string::npos ? source_path : source_path.substr (pos+1));
487 // is_dir and is_reg will return false if failed to stat. 568 return sys::file_ops::concat (out_dir, basename + ext);
488 if (fs.is_dir ()) 569 };
489 { 570
490 string_vector dirlist; 571 // We don't care if mkdir fails. Maybe it failed because it already
491 std::string msg; 572 // exists, or maybe it can't be created. If the first, then there's
492 573 // nothing to do, if the later, then it will be handled later. Any
493 // Collect the whole list of filenames first, before recursion 574 // is to be handled by not listing files in the output.
494 // to avoid issues with infinite loop if the action generates 575 sys::mkdir (out_dir, 0777);
495 // files in the same directory (highly likely). 576 return xzip<X> (source_patterns, mk_dest_path);
496 if (sys::get_dirlist (path, dirlist, msg)) 577 }
497 { 578
498 for (octave_idx_type i = 0; i < dirlist.numel (); i++) 579 template<typename X>
499 if (dirlist(i) != "." && dirlist(i) != "..") 580 static octave_value_list
500 walk (sys::file_ops::concat (path, dirlist(i))); 581 xzip (const std::string& fcn_name, const octave_value_list& args)
501 } 582 {
502 // Note that we skip any problem with directories. 583 const octave_idx_type nargin = args.length ();
503 } 584 if (nargin < 1 || nargin > 2)
504 else if (fs.is_reg ()) 585 print_usage ();
505 { 586
506 const std::string dest_path = mk_dest_path (path); 587 const Array<std::string> source_patterns
507 try 588 = args(0).xcellstr_value ("%s: FILES must be a character array or cellstr",
508 { 589 fcn_name.c_str ());
509 X::zip (path, dest_path); 590 if (nargin == 1)
510 } 591 return octave_value (Cell (xzip<X> (source_patterns)));
511 catch (const interrupt_exception&) 592 else // nargin == 2
512 { 593 {
513 throw; // interrupts are special, just re-throw. 594 const std::string out_dir = args(1).string_value ();
514 } 595 return octave_value (Cell (xzip<X> (source_patterns, out_dir)));
515 catch (...) 596 }
516 { 597 }
517 // Error "handling" is not including filename on the output list.
518 // Also, remove created file which may not have been created
519 // in the first place. Note that it is possible for the file
520 // to exist before the call to X::zip and that X::zip has not
521 // clobber it yet, but we remove it anyway.
522 sys::unlink (dest_path);
523 return;
524 }
525 dest_paths.push_front (dest_path);
526 }
527 // Skip all other file types and errors.
528 return;
529 };
530
531 for (octave_idx_type i = 0; i < source_patterns.numel (); i++)
532 {
533 const glob_match pattern (sys::file_ops::tilde_expand (source_patterns(i)));
534 const string_vector filepaths = pattern.glob ();
535 for (octave_idx_type j = 0; j < filepaths.numel (); j++)
536 walk (filepaths(j));
537 }
538 return string_vector (dest_paths);
539 }
540
541
542 template<typename X>
543 string_vector
544 xzip (const Array<std::string>& source_patterns)
545 {
546 const std::string ext = X::extension;
547 const std::function<std::string(const std::string&)> mk_dest_path
548 = [&ext] (const std::string& source_path) -> std::string
549 {
550 return source_path + ext;
551 };
552 return xzip<X> (source_patterns, mk_dest_path);
553 }
554
555 template<typename X>
556 string_vector
557 xzip (const Array<std::string>& source_patterns, const std::string& out_dir)
558 {
559 const std::string ext = X::extension;
560 const std::function<std::string(const std::string&)> mk_dest_path
561 = [&out_dir, &ext] (const std::string& source_path) -> std::string
562 {
563 // Strip any relative path (bug #58547)
564 std::size_t pos = source_path.find_last_of (sys::file_ops::dir_sep_str ());
565 const std::string basename =
566 (pos == std::string::npos ? source_path : source_path.substr (pos+1));
567 return sys::file_ops::concat (out_dir, basename + ext);
568 };
569
570 // We don't care if mkdir fails. Maybe it failed because it already
571 // exists, or maybe it can't be created. If the first, then there's
572 // nothing to do, if the later, then it will be handled later. Any
573 // is to be handled by not listing files in the output.
574 sys::mkdir (out_dir, 0777);
575 return xzip<X> (source_patterns, mk_dest_path);
576 }
577
578 template<typename X>
579 static octave_value_list
580 xzip (const std::string& fcn_name, const octave_value_list& args)
581 {
582 const octave_idx_type nargin = args.length ();
583 if (nargin < 1 || nargin > 2)
584 print_usage ();
585
586 const Array<std::string> source_patterns
587 = args(0).xcellstr_value ("%s: FILES must be a character array or cellstr",
588 fcn_name.c_str ());
589 if (nargin == 1)
590 return octave_value (Cell (xzip<X> (source_patterns)));
591 else // nargin == 2
592 {
593 const std::string out_dir = args(1).string_value ();
594 return octave_value (Cell (xzip<X> (source_patterns, out_dir)));
595 }
596 }
597 598
598 DEFUN_DLD (gzip, args, nargout, 599 DEFUN_DLD (gzip, args, nargout,
599 doc: /* -*- texinfo -*- 600 doc: /* -*- texinfo -*-
600 @deftypefn {} {@var{filelist} =} gzip (@var{files}) 601 @deftypefn {} {@var{filelist} =} gzip (@var{files})
601 @deftypefnx {} {@var{filelist} =} gzip (@var{files}, @var{dir}) 602 @deftypefnx {} {@var{filelist} =} gzip (@var{files}, @var{dir})