Mercurial > octave
view liboctave/operators/mx-inlines.cc @ 30239:120e878f343f
mkdir.m: issue a warning if DIR already exists (bug #61338)
* mkdir.m: When nargout==0, check status message and report a warning if
"directory exists" message found.
author | Rik <rik@octave.org> |
---|---|
date | Wed, 13 Oct 2021 08:05:14 -0700 |
parents | aef11bb4e6d1 |
children | 1f0a2689cab2 |
line wrap: on
line source
//////////////////////////////////////////////////////////////////////// // // Copyright (C) 1993-2021 The Octave Project Developers // // See the file COPYRIGHT.md in the top-level directory of this // distribution or <https://octave.org/copyright/>. // // This file is part of Octave. // // Octave is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Octave is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Octave; see the file COPYING. If not, see // <https://www.gnu.org/licenses/>. // //////////////////////////////////////////////////////////////////////// #if ! defined (octave_mx_inlines_h) #define octave_mx_inlines_h 1 // This file should *not* include config.h. It is only included in other C++ // source files that should have included config.h before including this file. #include <cstddef> #include <cmath> #include <algorithm> #include "Array-util.h" #include "Array.h" #include "bsxfun.h" #include "oct-cmplx.h" #include "oct-inttypes.h" #include "oct-locbuf.h" // Provides some commonly repeated, basic loop templates. template <typename R, typename S> inline void mx_inline_fill (std::size_t n, R *r, S s) { for (std::size_t i = 0; i < n; i++) r[i] = s; } template <typename R, typename X> inline void mx_inline_uminus (std::size_t n, R *r, const X *x) { for (std::size_t i = 0; i < n; i++) r[i] = -x[i]; } template <typename R> inline void mx_inline_uminus2 (std::size_t n, R *r) { for (std::size_t i = 0; i < n; i++) r[i] = -r[i]; } template <typename X> inline void mx_inline_iszero (std::size_t n, bool *r, const X *x) { const X zero = X (); for (std::size_t i = 0; i < n; i++) r[i] = x[i] == zero; } template <typename X> inline void mx_inline_notzero (std::size_t n, bool *r, const X *x) { const X zero = X (); for (std::size_t i = 0; i < n; i++) r[i] = x[i] != zero; } #define DEFMXBINOP(F, OP) \ template <typename R, typename X, typename Y> \ inline void F (std::size_t n, R *r, const X *x, const Y *y) \ { \ for (std::size_t i = 0; i < n; i++) \ r[i] = x[i] OP y[i]; \ } \ template <typename R, typename X, typename Y> \ inline void F (std::size_t n, R *r, const X *x, Y y) \ { \ for (std::size_t i = 0; i < n; i++) \ r[i] = x[i] OP y; \ } \ template <typename R, typename X, typename Y> \ inline void F (std::size_t n, R *r, X x, const Y *y) \ { \ for (std::size_t i = 0; i < n; i++) \ r[i] = x OP y[i]; \ } DEFMXBINOP (mx_inline_add, +) DEFMXBINOP (mx_inline_sub, -) DEFMXBINOP (mx_inline_mul, *) DEFMXBINOP (mx_inline_div, /) #define DEFMXBINOPEQ(F, OP) \ template <typename R, typename X> \ inline void F (std::size_t n, R *r, const X *x) \ { \ for (std::size_t i = 0; i < n; i++) \ r[i] OP x[i]; \ } \ template <typename R, typename X> \ inline void F (std::size_t n, R *r, X x) \ { \ for (std::size_t i = 0; i < n; i++) \ r[i] OP x; \ } DEFMXBINOPEQ (mx_inline_add2, +=) DEFMXBINOPEQ (mx_inline_sub2, -=) DEFMXBINOPEQ (mx_inline_mul2, *=) DEFMXBINOPEQ (mx_inline_div2, /=) #define DEFMXCMPOP(F, OP) \ template <typename X, typename Y> \ inline void F (std::size_t n, bool *r, const X *x, const Y *y) \ { \ for (std::size_t i = 0; i < n; i++) \ r[i] = x[i] OP y[i]; \ } \ template <typename X, typename Y> \ inline void F (std::size_t n, bool *r, const X *x, Y y) \ { \ for (std::size_t i = 0; i < n; i++) \ r[i] = x[i] OP y; \ } \ template <typename X, typename Y> \ inline void F (std::size_t n, bool *r, X x, const Y *y) \ { \ for (std::size_t i = 0; i < n; i++) \ r[i] = x OP y[i]; \ } DEFMXCMPOP (mx_inline_lt, <) DEFMXCMPOP (mx_inline_le, <=) DEFMXCMPOP (mx_inline_gt, >) DEFMXCMPOP (mx_inline_ge, >=) DEFMXCMPOP (mx_inline_eq, ==) DEFMXCMPOP (mx_inline_ne, !=) // Convert to logical value, for logical op purposes. template <typename T> inline bool logical_value (T x) { return x; } template <typename T> inline bool logical_value (const std::complex<T>& x) { return x.real () != 0 || x.imag () != 0; } template <typename T> inline bool logical_value (const octave_int<T>& x) { return x.value (); } template <typename X> void mx_inline_not (std::size_t n, bool *r, const X *x) { for (std::size_t i = 0; i < n; i++) r[i] = ! logical_value (x[i]); } inline void mx_inline_not2 (std::size_t n, bool *r) { for (std::size_t i = 0; i < n; i++) r[i] = ! r[i]; } #define DEFMXBOOLOP(F, NOT1, OP, NOT2) \ template <typename X, typename Y> \ inline void F (std::size_t n, bool *r, const X *x, const Y *y) \ { \ for (std::size_t i = 0; i < n; i++) \ r[i] = ((NOT1 logical_value (x[i])) \ OP (NOT2 logical_value (y[i]))); \ } \ template <typename X, typename Y> \ inline void F (std::size_t n, bool *r, const X *x, Y y) \ { \ const bool yy = (NOT2 logical_value (y)); \ for (std::size_t i = 0; i < n; i++) \ r[i] = (NOT1 logical_value (x[i])) OP yy; \ } \ template <typename X, typename Y> \ inline void F (std::size_t n, bool *r, X x, const Y *y) \ { \ const bool xx = (NOT1 logical_value (x)); \ for (std::size_t i = 0; i < n; i++) \ r[i] = xx OP (NOT2 logical_value (y[i])); \ } DEFMXBOOLOP (mx_inline_and, , &, ) DEFMXBOOLOP (mx_inline_or, , |, ) DEFMXBOOLOP (mx_inline_not_and, !, &, ) DEFMXBOOLOP (mx_inline_not_or, !, |, ) DEFMXBOOLOP (mx_inline_and_not, , &, !) DEFMXBOOLOP (mx_inline_or_not, , |, !) template <typename X> inline void mx_inline_and2 (std::size_t n, bool *r, const X *x) { for (std::size_t i = 0; i < n; i++) r[i] &= logical_value (x[i]); } template <typename X> inline void mx_inline_and2 (std::size_t n, bool *r, X x) { for (std::size_t i = 0; i < n; i++) r[i] &= x; } template <typename X> inline void mx_inline_or2 (std::size_t n, bool *r, const X *x) { for (std::size_t i = 0; i < n; i++) r[i] |= logical_value (x[i]); } template <typename X> inline void mx_inline_or2 (std::size_t n, bool *r, X x) { for (std::size_t i = 0; i < n; i++) r[i] |= x; } template <typename T> inline bool mx_inline_any_nan (std::size_t n, const T *x) { for (std::size_t i = 0; i < n; i++) { if (octave::math::isnan (x[i])) return true; } return false; } template <typename T> inline bool mx_inline_all_finite (std::size_t n, const T *x) { for (std::size_t i = 0; i < n; i++) { if (! octave::math::isfinite (x[i])) return false; } return true; } template <typename T> inline bool mx_inline_any_negative (std::size_t n, const T *x) { for (std::size_t i = 0; i < n; i++) { if (x[i] < 0) return true; } return false; } template <typename T> inline bool mx_inline_any_positive (std::size_t n, const T *x) { for (std::size_t i = 0; i < n; i++) { if (x[i] > 0) return true; } return false; } template <typename T> inline bool mx_inline_all_real (std::size_t n, const std::complex<T>* x) { for (std::size_t i = 0; i < n; i++) { if (x[i].imag () != 0) return false; } return true; } template <typename T> inline void mx_inline_real (std::size_t n, T *r, const std::complex<T>* x) { for (std::size_t i = 0; i < n; i++) r[i] = x[i].real (); } template <typename T> inline void mx_inline_imag (std::size_t n, T *r, const std::complex<T>* x) { for (std::size_t i = 0; i < n; i++) r[i] = x[i].imag (); } template <typename T> inline void mx_inline_xmin (std::size_t n, T *r, const T *x, const T *y) { for (std::size_t i = 0; i < n; i++) r[i] = octave::math::min (x[i], y[i]); } template <typename T> inline void mx_inline_xmin (std::size_t n, T *r, const T *x, T y) { for (std::size_t i = 0; i < n; i++) r[i] = octave::math::min (x[i], y); } template <typename T> inline void mx_inline_xmin (std::size_t n, T *r, T x, const T *y) { for (std::size_t i = 0; i < n; i++) r[i] = octave::math::min (x, y[i]); } template <typename T> inline void mx_inline_xmax (std::size_t n, T *r, const T *x, const T *y) { for (std::size_t i = 0; i < n; i++) r[i] = octave::math::max (x[i], y[i]); } template <typename T> inline void mx_inline_xmax (std::size_t n, T *r, const T *x, T y) { for (std::size_t i = 0; i < n; i++) r[i] = octave::math::max (x[i], y); } template <typename T> inline void mx_inline_xmax (std::size_t n, T *r, T x, const T *y) { for (std::size_t i = 0; i < n; i++) r[i] = octave::math::max (x, y[i]); } // Specialize array-scalar max/min #define DEFMINMAXSPEC(T, F, OP) \ template <> \ inline void F<T> (std::size_t n, T *r, const T *x, T y) \ { \ if (octave::math::isnan (y)) \ std::memcpy (r, x, n * sizeof (T)); \ else \ for (std::size_t i = 0; i < n; i++) \ r[i] = (x[i] OP y ? x[i] : y); \ } \ template <> \ inline void F<T> (std::size_t n, T *r, T x, const T *y) \ { \ if (octave::math::isnan (x)) \ std::memcpy (r, y, n * sizeof (T)); \ else \ for (std::size_t i = 0; i < n; i++) \ r[i] = (y[i] OP x ? y[i] : x); \ } DEFMINMAXSPEC (double, mx_inline_xmin, <=) DEFMINMAXSPEC (double, mx_inline_xmax, >=) DEFMINMAXSPEC (float, mx_inline_xmin, <=) DEFMINMAXSPEC (float, mx_inline_xmax, >=) // FIXME: Is this comment correct anymore? It seems like std::pow is chosen. // Let the compiler decide which pow to use, whichever best matches the // arguments provided. template <typename R, typename X, typename Y> inline void mx_inline_pow (std::size_t n, R *r, const X *x, const Y *y) { using std::pow; for (std::size_t i = 0; i < n; i++) r[i] = pow (x[i], y[i]); } template <typename R, typename X, typename Y> inline void mx_inline_pow (std::size_t n, R *r, const X *x, Y y) { using std::pow; for (std::size_t i = 0; i < n; i++) r[i] = pow (x[i], y); } template <typename R, typename X, typename Y> inline void mx_inline_pow (std::size_t n, R *r, X x, const Y *y) { using std::pow; for (std::size_t i = 0; i < n; i++) r[i] = pow (x, y[i]); } // Arbitrary function appliers. // The function is a template parameter to enable inlining. template <typename R, typename X, R fun (X x)> inline void mx_inline_map (std::size_t n, R *r, const X *x) { for (std::size_t i = 0; i < n; i++) r[i] = fun (x[i]); } template <typename R, typename X, R fun (const X& x)> inline void mx_inline_map (std::size_t n, R *r, const X *x) { for (std::size_t i = 0; i < n; i++) r[i] = fun (x[i]); } // Appliers. Since these call the operation just once, we pass it as // a pointer, to allow the compiler reduce number of instances. template <typename R, typename X> inline Array<R> do_mx_unary_op (const Array<X>& x, void (*op) (std::size_t, R *, const X *)) { Array<R> r (x.dims ()); op (r.numel (), r.fortran_vec (), x.data ()); return r; } // Shortcuts for applying mx_inline_map. template <typename R, typename X, R fun (X)> inline Array<R> do_mx_unary_map (const Array<X>& x) { return do_mx_unary_op<R, X> (x, mx_inline_map<R, X, fun>); } template <typename R, typename X, R fun (const X&)> inline Array<R> do_mx_unary_map (const Array<X>& x) { return do_mx_unary_op<R, X> (x, mx_inline_map<R, X, fun>); } template <typename R> inline Array<R>& do_mx_inplace_op (Array<R>& r, void (*op) (std::size_t, R *)) { op (r.numel (), r.fortran_vec ()); return r; } template <typename R, typename X, typename Y> inline Array<R> do_mm_binary_op (const Array<X>& x, const Array<Y>& y, void (*op) (std::size_t, R *, const X *, const Y *), void (*op1) (std::size_t, R *, X, const Y *), void (*op2) (std::size_t, R *, const X *, Y), const char *opname) { dim_vector dx = x.dims (); dim_vector dy = y.dims (); if (dx == dy) { Array<R> r (dx); op (r.numel (), r.fortran_vec (), x.data (), y.data ()); return r; } else if (is_valid_bsxfun (opname, dx, dy)) { return do_bsxfun_op (x, y, op, op1, op2); } else octave::err_nonconformant (opname, dx, dy); } template <typename R, typename X, typename Y> inline Array<R> do_ms_binary_op (const Array<X>& x, const Y& y, void (*op) (std::size_t, R *, const X *, Y)) { Array<R> r (x.dims ()); op (r.numel (), r.fortran_vec (), x.data (), y); return r; } template <typename R, typename X, typename Y> inline Array<R> do_sm_binary_op (const X& x, const Array<Y>& y, void (*op) (std::size_t, R *, X, const Y *)) { Array<R> r (y.dims ()); op (r.numel (), r.fortran_vec (), x, y.data ()); return r; } template <typename R, typename X> inline Array<R>& do_mm_inplace_op (Array<R>& r, const Array<X>& x, void (*op) (std::size_t, R *, const X *), void (*op1) (std::size_t, R *, X), const char *opname) { dim_vector dr = r.dims (); dim_vector dx = x.dims (); if (dr == dx) op (r.numel (), r.fortran_vec (), x.data ()); else if (is_valid_inplace_bsxfun (opname, dr, dx)) do_inplace_bsxfun_op (r, x, op, op1); else octave::err_nonconformant (opname, dr, dx); return r; } template <typename R, typename X> inline Array<R>& do_ms_inplace_op (Array<R>& r, const X& x, void (*op) (std::size_t, R *, X)) { op (r.numel (), r.fortran_vec (), x); return r; } template <typename T1, typename T2> inline bool mx_inline_equal (std::size_t n, const T1 *x, const T2 *y) { for (std::size_t i = 0; i < n; i++) if (x[i] != y[i]) return false; return true; } template <typename T> inline bool do_mx_check (const Array<T>& a, bool (*op) (std::size_t, const T *)) { return op (a.numel (), a.data ()); } // NOTE: we don't use std::norm because it typically does some heavyweight // magic to avoid underflows, which we don't need here. template <typename T> inline T cabsq (const std::complex<T>& c) { return c.real () * c.real () + c.imag () * c.imag (); } // default. works for integers and bool. template <typename T> inline bool xis_true (T x) { return x; } template <typename T> inline bool xis_false (T x) { return ! x; } // for octave_ints template <typename T> inline bool xis_true (const octave_int<T>& x) { return x.value (); } template <typename T> inline bool xis_false (const octave_int<T>& x) { return ! x.value (); } // for reals, we want to ignore NaNs. inline bool xis_true (double x) { return ! octave::math::isnan (x) && x != 0.0; } inline bool xis_false (double x) { return x == 0.0; } inline bool xis_true (float x) { return ! octave::math::isnan (x) && x != 0.0f; } inline bool xis_false (float x) { return x == 0.0f; } // Ditto for complex. inline bool xis_true (const Complex& x) { return ! octave::math::isnan (x) && x != 0.0; } inline bool xis_false (const Complex& x) { return x == 0.0; } inline bool xis_true (const FloatComplex& x) { return ! octave::math::isnan (x) && x != 0.0f; } inline bool xis_false (const FloatComplex& x) { return x == 0.0f; } #define OP_RED_SUM(ac, el) ac += el #define OP_RED_PROD(ac, el) ac *= el #define OP_RED_SUMSQ(ac, el) ac += ((el)*(el)) #define OP_RED_SUMSQC(ac, el) ac += cabsq (el) inline void op_dble_prod (double& ac, float el) { ac *= el; } // FIXME: guaranteed? inline void op_dble_prod (Complex& ac, const FloatComplex& el) { ac *= el; } template <typename T> inline void op_dble_prod (double& ac, const octave_int<T>& el) { ac *= el.double_value (); } inline void op_dble_sum (double& ac, float el) { ac += el; } // FIXME: guaranteed? inline void op_dble_sum (Complex& ac, const FloatComplex& el) { ac += el; } template <typename T> inline void op_dble_sum (double& ac, const octave_int<T>& el) { ac += el.double_value (); } // The following two implement a simple short-circuiting. #define OP_RED_ANYC(ac, el) \ if (xis_true (el)) \ { \ ac = true; \ break; \ } \ else \ continue #define OP_RED_ALLC(ac, el) \ if (xis_false (el)) \ { \ ac = false; \ break; \ } \ else \ continue #define OP_RED_FCN(F, TSRC, TRES, OP, ZERO) \ template <typename T> \ inline TRES \ F (const TSRC *v, octave_idx_type n) \ { \ TRES ac = ZERO; \ for (octave_idx_type i = 0; i < n; i++) \ OP(ac, v[i]); \ return ac; \ } #define PROMOTE_DOUBLE(T) \ typename subst_template_param<std::complex, T, double>::type OP_RED_FCN (mx_inline_sum, T, T, OP_RED_SUM, 0) OP_RED_FCN (mx_inline_dsum, T, PROMOTE_DOUBLE(T), op_dble_sum, 0.0) OP_RED_FCN (mx_inline_count, bool, T, OP_RED_SUM, 0) OP_RED_FCN (mx_inline_prod, T, T, OP_RED_PROD, 1) OP_RED_FCN (mx_inline_dprod, T, PROMOTE_DOUBLE(T), op_dble_prod, 1) OP_RED_FCN (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0) OP_RED_FCN (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0) OP_RED_FCN (mx_inline_any, T, bool, OP_RED_ANYC, false) OP_RED_FCN (mx_inline_all, T, bool, OP_RED_ALLC, true) #define OP_RED_FCN2(F, TSRC, TRES, OP, ZERO) \ template <typename T> \ inline void \ F (const TSRC *v, TRES *r, octave_idx_type m, octave_idx_type n) \ { \ for (octave_idx_type i = 0; i < m; i++) \ r[i] = ZERO; \ for (octave_idx_type j = 0; j < n; j++) \ { \ for (octave_idx_type i = 0; i < m; i++) \ OP(r[i], v[i]); \ v += m; \ } \ } OP_RED_FCN2 (mx_inline_sum, T, T, OP_RED_SUM, 0) OP_RED_FCN2 (mx_inline_dsum, T, PROMOTE_DOUBLE(T), op_dble_sum, 0.0) OP_RED_FCN2 (mx_inline_count, bool, T, OP_RED_SUM, 0) OP_RED_FCN2 (mx_inline_prod, T, T, OP_RED_PROD, 1) OP_RED_FCN2 (mx_inline_dprod, T, PROMOTE_DOUBLE(T), op_dble_prod, 0.0) OP_RED_FCN2 (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0) OP_RED_FCN2 (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0) #define OP_RED_ANYR(ac, el) ac |= xis_true (el) #define OP_RED_ALLR(ac, el) ac &= xis_true (el) OP_RED_FCN2 (mx_inline_any_r, T, bool, OP_RED_ANYR, false) OP_RED_FCN2 (mx_inline_all_r, T, bool, OP_RED_ALLR, true) // Using the general code for any/all would sacrifice short-circuiting. // OTOH, going by rows would sacrifice cache-coherence. The following // algorithm will achieve both, at the cost of a temporary octave_idx_type // array. #define OP_ROW_SHORT_CIRCUIT(F, PRED, ZERO) \ template <typename T> \ inline void \ F (const T *v, bool *r, octave_idx_type m, octave_idx_type n) \ { \ if (n <= 8) \ return F ## _r (v, r, m, n); \ \ /* FIXME: it may be sub-optimal to allocate the buffer here. */ \ OCTAVE_LOCAL_BUFFER (octave_idx_type, iact, m); \ for (octave_idx_type i = 0; i < m; i++) iact[i] = i; \ octave_idx_type nact = m; \ for (octave_idx_type j = 0; j < n; j++) \ { \ octave_idx_type k = 0; \ for (octave_idx_type i = 0; i < nact; i++) \ { \ octave_idx_type ia = iact[i]; \ if (! PRED (v[ia])) \ iact[k++] = ia; \ } \ nact = k; \ v += m; \ } \ for (octave_idx_type i = 0; i < m; i++) r[i] = ! ZERO; \ for (octave_idx_type i = 0; i < nact; i++) r[iact[i]] = ZERO; \ } OP_ROW_SHORT_CIRCUIT (mx_inline_any, xis_true, false) OP_ROW_SHORT_CIRCUIT (mx_inline_all, xis_false, true) #define OP_RED_FCNN(F, TSRC, TRES) \ template <typename T> \ inline void \ F (const TSRC *v, TRES *r, octave_idx_type l, \ octave_idx_type n, octave_idx_type u) \ { \ if (l == 1) \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ r[i] = F<T> (v, n); \ v += n; \ } \ } \ else \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, l, n); \ v += l*n; \ r += l; \ } \ } \ } OP_RED_FCNN (mx_inline_sum, T, T) OP_RED_FCNN (mx_inline_dsum, T, PROMOTE_DOUBLE(T)) OP_RED_FCNN (mx_inline_count, bool, T) OP_RED_FCNN (mx_inline_prod, T, T) OP_RED_FCNN (mx_inline_dprod, T, PROMOTE_DOUBLE(T)) OP_RED_FCNN (mx_inline_sumsq, T, T) OP_RED_FCNN (mx_inline_sumsq, std::complex<T>, T) OP_RED_FCNN (mx_inline_any, T, bool) OP_RED_FCNN (mx_inline_all, T, bool) #define OP_CUM_FCN(F, TSRC, TRES, OP) \ template <typename T> \ inline void \ F (const TSRC *v, TRES *r, octave_idx_type n) \ { \ if (n) \ { \ TRES t = r[0] = v[0]; \ for (octave_idx_type i = 1; i < n; i++) \ r[i] = t = t OP v[i]; \ } \ } OP_CUM_FCN (mx_inline_cumsum, T, T, +) OP_CUM_FCN (mx_inline_cumprod, T, T, *) OP_CUM_FCN (mx_inline_cumcount, bool, T, +) #define OP_CUM_FCN2(F, TSRC, TRES, OP) \ template <typename T> \ inline void \ F (const TSRC *v, TRES *r, octave_idx_type m, octave_idx_type n) \ { \ if (n) \ { \ for (octave_idx_type i = 0; i < m; i++) \ r[i] = v[i]; \ const T *r0 = r; \ for (octave_idx_type j = 1; j < n; j++) \ { \ r += m; v += m; \ for (octave_idx_type i = 0; i < m; i++) \ r[i] = r0[i] OP v[i]; \ r0 += m; \ } \ } \ } OP_CUM_FCN2 (mx_inline_cumsum, T, T, +) OP_CUM_FCN2 (mx_inline_cumprod, T, T, *) OP_CUM_FCN2 (mx_inline_cumcount, bool, T, +) #define OP_CUM_FCNN(F, TSRC, TRES) \ template <typename T> \ inline void \ F (const TSRC *v, TRES *r, octave_idx_type l, \ octave_idx_type n, octave_idx_type u) \ { \ if (l == 1) \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, n); \ v += n; \ r += n; \ } \ } \ else \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, l, n); \ v += l*n; \ r += l*n; \ } \ } \ } OP_CUM_FCNN (mx_inline_cumsum, T, T) OP_CUM_FCNN (mx_inline_cumprod, T, T) OP_CUM_FCNN (mx_inline_cumcount, bool, T) #define OP_MINMAX_FCN(F, OP) \ template <typename T> \ void F (const T *v, T *r, octave_idx_type n) \ { \ if (! n) \ return; \ T tmp = v[0]; \ octave_idx_type i = 1; \ if (octave::math::isnan (tmp)) \ { \ for (; i < n && octave::math::isnan (v[i]); i++) ; \ if (i < n) \ tmp = v[i]; \ } \ for (; i < n; i++) \ if (v[i] OP tmp) \ tmp = v[i]; \ *r = tmp; \ } \ template <typename T> \ void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \ { \ if (! n) \ return; \ T tmp = v[0]; \ octave_idx_type tmpi = 0; \ octave_idx_type i = 1; \ if (octave::math::isnan (tmp)) \ { \ for (; i < n && octave::math::isnan (v[i]); i++) ; \ if (i < n) \ { \ tmp = v[i]; \ tmpi = i; \ } \ } \ for (; i < n; i++) \ if (v[i] OP tmp) \ { \ tmp = v[i]; \ tmpi = i; \ } \ *r = tmp; \ *ri = tmpi; \ } OP_MINMAX_FCN (mx_inline_min, <) OP_MINMAX_FCN (mx_inline_max, >) // Row reductions will be slightly complicated. We will proceed with checks // for NaNs until we detect that no row will yield a NaN, in which case we // proceed to a faster code. #define OP_MINMAX_FCN2(F, OP) \ template <typename T> \ inline void \ F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \ { \ if (! n) \ return; \ bool nan = false; \ octave_idx_type j = 0; \ for (octave_idx_type i = 0; i < m; i++) \ { \ r[i] = v[i]; \ if (octave::math::isnan (v[i])) \ nan = true; \ } \ j++; \ v += m; \ while (nan && j < n) \ { \ nan = false; \ for (octave_idx_type i = 0; i < m; i++) \ { \ if (octave::math::isnan (v[i])) \ nan = true; \ else if (octave::math::isnan (r[i]) || v[i] OP r[i]) \ r[i] = v[i]; \ } \ j++; \ v += m; \ } \ while (j < n) \ { \ for (octave_idx_type i = 0; i < m; i++) \ if (v[i] OP r[i]) \ r[i] = v[i]; \ j++; \ v += m; \ } \ } \ template <typename T> \ inline void \ F (const T *v, T *r, octave_idx_type *ri, \ octave_idx_type m, octave_idx_type n) \ { \ if (! n) \ return; \ bool nan = false; \ octave_idx_type j = 0; \ for (octave_idx_type i = 0; i < m; i++) \ { \ r[i] = v[i]; \ ri[i] = j; \ if (octave::math::isnan (v[i])) \ nan = true; \ } \ j++; \ v += m; \ while (nan && j < n) \ { \ nan = false; \ for (octave_idx_type i = 0; i < m; i++) \ { \ if (octave::math::isnan (v[i])) \ nan = true; \ else if (octave::math::isnan (r[i]) || v[i] OP r[i]) \ { \ r[i] = v[i]; \ ri[i] = j; \ } \ } \ j++; \ v += m; \ } \ while (j < n) \ { \ for (octave_idx_type i = 0; i < m; i++) \ if (v[i] OP r[i]) \ { \ r[i] = v[i]; \ ri[i] = j; \ } \ j++; \ v += m; \ } \ } OP_MINMAX_FCN2 (mx_inline_min, <) OP_MINMAX_FCN2 (mx_inline_max, >) #define OP_MINMAX_FCNN(F) \ template <typename T> \ inline void \ F (const T *v, T *r, octave_idx_type l, \ octave_idx_type n, octave_idx_type u) \ { \ if (! n) \ return; \ if (l == 1) \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, n); \ v += n; \ r++; \ } \ } \ else \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, l, n); \ v += l*n; \ r += l; \ } \ } \ } \ template <typename T> \ inline void \ F (const T *v, T *r, octave_idx_type *ri, \ octave_idx_type l, octave_idx_type n, octave_idx_type u) \ { \ if (! n) return; \ if (l == 1) \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, ri, n); \ v += n; \ r++; \ ri++; \ } \ } \ else \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, ri, l, n); \ v += l*n; \ r += l; \ ri += l; \ } \ } \ } OP_MINMAX_FCNN (mx_inline_min) OP_MINMAX_FCNN (mx_inline_max) #define OP_CUMMINMAX_FCN(F, OP) \ template <typename T> \ void F (const T *v, T *r, octave_idx_type n) \ { \ if (! n) \ return; \ T tmp = v[0]; \ octave_idx_type i = 1; \ octave_idx_type j = 0; \ if (octave::math::isnan (tmp)) \ { \ for (; i < n && octave::math::isnan (v[i]); i++) ; \ for (; j < i; j++) \ r[j] = tmp; \ if (i < n) \ tmp = v[i]; \ } \ for (; i < n; i++) \ if (v[i] OP tmp) \ { \ for (; j < i; j++) \ r[j] = tmp; \ tmp = v[i]; \ } \ for (; j < i; j++) \ r[j] = tmp; \ } \ template <typename T> \ void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \ { \ if (! n) \ return; \ T tmp = v[0]; \ octave_idx_type tmpi = 0; \ octave_idx_type i = 1; \ octave_idx_type j = 0; \ if (octave::math::isnan (tmp)) \ { \ for (; i < n && octave::math::isnan (v[i]); i++) ; \ for (; j < i; j++) \ { \ r[j] = tmp; \ ri[j] = tmpi; \ } \ if (i < n) \ { \ tmp = v[i]; \ tmpi = i; \ } \ } \ for (; i < n; i++) \ if (v[i] OP tmp) \ { \ for (; j < i; j++) \ { \ r[j] = tmp; \ ri[j] = tmpi; \ } \ tmp = v[i]; \ tmpi = i; \ } \ for (; j < i; j++) \ { \ r[j] = tmp; \ ri[j] = tmpi; \ } \ } OP_CUMMINMAX_FCN (mx_inline_cummin, <) OP_CUMMINMAX_FCN (mx_inline_cummax, >) // Row reductions will be slightly complicated. We will proceed with checks // for NaNs until we detect that no row will yield a NaN, in which case we // proceed to a faster code. #define OP_CUMMINMAX_FCN2(F, OP) \ template <typename T> \ inline void \ F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \ { \ if (! n) \ return; \ bool nan = false; \ const T *r0; \ octave_idx_type j = 0; \ for (octave_idx_type i = 0; i < m; i++) \ { \ r[i] = v[i]; \ if (octave::math::isnan (v[i])) \ nan = true; \ } \ j++; \ v += m; \ r0 = r; \ r += m; \ while (nan && j < n) \ { \ nan = false; \ for (octave_idx_type i = 0; i < m; i++) \ { \ if (octave::math::isnan (v[i])) \ { \ r[i] = r0[i]; \ nan = true; \ } \ else if (octave::math::isnan (r0[i]) || v[i] OP r0[i]) \ r[i] = v[i]; \ else \ r[i] = r0[i]; \ } \ j++; \ v += m; \ r0 = r; \ r += m; \ } \ while (j < n) \ { \ for (octave_idx_type i = 0; i < m; i++) \ if (v[i] OP r0[i]) \ r[i] = v[i]; \ else \ r[i] = r0[i]; \ j++; \ v += m; \ r0 = r; \ r += m; \ } \ } \ template <typename T> \ inline void \ F (const T *v, T *r, octave_idx_type *ri, \ octave_idx_type m, octave_idx_type n) \ { \ if (! n) \ return; \ bool nan = false; \ const T *r0; \ const octave_idx_type *r0i; \ octave_idx_type j = 0; \ for (octave_idx_type i = 0; i < m; i++) \ { \ r[i] = v[i]; ri[i] = 0; \ if (octave::math::isnan (v[i])) \ nan = true; \ } \ j++; \ v += m; \ r0 = r; \ r += m; \ r0i = ri; \ ri += m; \ while (nan && j < n) \ { \ nan = false; \ for (octave_idx_type i = 0; i < m; i++) \ { \ if (octave::math::isnan (v[i])) \ { \ r[i] = r0[i]; \ ri[i] = r0i[i]; \ nan = true; \ } \ else if (octave::math::isnan (r0[i]) || v[i] OP r0[i]) \ { \ r[i] = v[i]; \ ri[i] = j; \ } \ else \ { \ r[i] = r0[i]; \ ri[i] = r0i[i]; \ } \ } \ j++; \ v += m; \ r0 = r; \ r += m; \ r0i = ri; \ ri += m; \ } \ while (j < n) \ { \ for (octave_idx_type i = 0; i < m; i++) \ if (v[i] OP r0[i]) \ { \ r[i] = v[i]; \ ri[i] = j; \ } \ else \ { \ r[i] = r0[i]; \ ri[i] = r0i[i]; \ } \ j++; \ v += m; \ r0 = r; \ r += m; \ r0i = ri; \ ri += m; \ } \ } OP_CUMMINMAX_FCN2 (mx_inline_cummin, <) OP_CUMMINMAX_FCN2 (mx_inline_cummax, >) #define OP_CUMMINMAX_FCNN(F) \ template <typename T> \ inline void \ F (const T *v, T *r, octave_idx_type l, \ octave_idx_type n, octave_idx_type u) \ { \ if (! n) \ return; \ if (l == 1) \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, n); \ v += n; \ r += n; \ } \ } \ else \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, l, n); \ v += l*n; \ r += l*n; \ } \ } \ } \ template <typename T> \ inline void \ F (const T *v, T *r, octave_idx_type *ri, \ octave_idx_type l, octave_idx_type n, octave_idx_type u) \ { \ if (! n) \ return; \ if (l == 1) \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, ri, n); \ v += n; \ r += n; \ ri += n; \ } \ } \ else \ { \ for (octave_idx_type i = 0; i < u; i++) \ { \ F (v, r, ri, l, n); \ v += l*n; \ r += l*n; \ ri += l*n; \ } \ } \ } OP_CUMMINMAX_FCNN (mx_inline_cummin) OP_CUMMINMAX_FCNN (mx_inline_cummax) template <typename T> void mx_inline_diff (const T *v, T *r, octave_idx_type n, octave_idx_type order) { switch (order) { case 1: for (octave_idx_type i = 0; i < n-1; i++) r[i] = v[i+1] - v[i]; break; case 2: if (n > 1) { T lst = v[1] - v[0]; for (octave_idx_type i = 0; i < n-2; i++) { T dif = v[i+2] - v[i+1]; r[i] = dif - lst; lst = dif; } } break; default: { OCTAVE_LOCAL_BUFFER (T, buf, n-1); for (octave_idx_type i = 0; i < n-1; i++) buf[i] = v[i+1] - v[i]; for (octave_idx_type o = 2; o <= order; o++) { for (octave_idx_type i = 0; i < n-o; i++) buf[i] = buf[i+1] - buf[i]; } for (octave_idx_type i = 0; i < n-order; i++) r[i] = buf[i]; } } } template <typename T> void mx_inline_diff (const T *v, T *r, octave_idx_type m, octave_idx_type n, octave_idx_type order) { switch (order) { case 1: for (octave_idx_type i = 0; i < m*(n-1); i++) r[i] = v[i+m] - v[i]; break; case 2: for (octave_idx_type i = 0; i < n-2; i++) { for (octave_idx_type j = i*m; j < i*m+m; j++) r[j] = (v[j+m+m] - v[j+m]) - (v[j+m] - v[j]); } break; default: { OCTAVE_LOCAL_BUFFER (T, buf, n-1); for (octave_idx_type j = 0; j < m; j++) { for (octave_idx_type i = 0; i < n-1; i++) buf[i] = v[i*m+j+m] - v[i*m+j]; for (octave_idx_type o = 2; o <= order; o++) { for (octave_idx_type i = 0; i < n-o; i++) buf[i] = buf[i+1] - buf[i]; } for (octave_idx_type i = 0; i < n-order; i++) r[i*m+j] = buf[i]; } } } } template <typename T> inline void mx_inline_diff (const T *v, T *r, octave_idx_type l, octave_idx_type n, octave_idx_type u, octave_idx_type order) { if (! n) return; if (l == 1) { for (octave_idx_type i = 0; i < u; i++) { mx_inline_diff (v, r, n, order); v += n; r += n-order; } } else { for (octave_idx_type i = 0; i < u; i++) { mx_inline_diff (v, r, l, n, order); v += l*n; r += l*(n-order); } } } // Assistant function inline void get_extent_triplet (const dim_vector& dims, int& dim, octave_idx_type& l, octave_idx_type& n, octave_idx_type& u) { octave_idx_type ndims = dims.ndims (); if (dim >= ndims) { l = dims.numel (); n = 1; u = 1; } else { if (dim < 0) dim = dims.first_non_singleton (); // calculate extent triplet. l = 1, n = dims(dim), u = 1; for (octave_idx_type i = 0; i < dim; i++) l *= dims(i); for (octave_idx_type i = dim + 1; i < ndims; i++) u *= dims(i); } } // Appliers. // FIXME: is this the best design? C++ gives a lot of options here... // maybe it can be done without an explicit parameter? template <typename R, typename T> inline Array<R> do_mx_red_op (const Array<T>& src, int dim, void (*mx_red_op) (const T *, R *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; dim_vector dims = src.dims (); // M*b inconsistency: sum ([]) = 0 etc. if (dims.ndims () == 2 && dims(0) == 0 && dims(1) == 0) dims(1) = 1; get_extent_triplet (dims, dim, l, n, u); // Reduction operation reduces the array size. if (dim < dims.ndims ()) dims(dim) = 1; dims.chop_trailing_singletons (); Array<R> ret (dims); mx_red_op (src.data (), ret.fortran_vec (), l, n, u); return ret; } template <typename R, typename T> inline Array<R> do_mx_cum_op (const Array<T>& src, int dim, void (*mx_cum_op) (const T *, R *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; dim_vector dims = src.dims (); get_extent_triplet (dims, dim, l, n, u); // Cumulative operation doesn't reduce the array size. Array<R> ret (dims); mx_cum_op (src.data (), ret.fortran_vec (), l, n, u); return ret; } template <typename R> inline Array<R> do_mx_minmax_op (const Array<R>& src, int dim, void (*mx_minmax_op) (const R *, R *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; dim_vector dims = src.dims (); get_extent_triplet (dims, dim, l, n, u); // If the dimension is zero, we don't do anything. if (dim < dims.ndims () && dims(dim) != 0) dims(dim) = 1; dims.chop_trailing_singletons (); Array<R> ret (dims); mx_minmax_op (src.data (), ret.fortran_vec (), l, n, u); return ret; } template <typename R> inline Array<R> do_mx_minmax_op (const Array<R>& src, Array<octave_idx_type>& idx, int dim, void (*mx_minmax_op) (const R *, R *, octave_idx_type *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; dim_vector dims = src.dims (); get_extent_triplet (dims, dim, l, n, u); // If the dimension is zero, we don't do anything. if (dim < dims.ndims () && dims(dim) != 0) dims(dim) = 1; dims.chop_trailing_singletons (); Array<R> ret (dims); if (idx.dims () != dims) idx = Array<octave_idx_type> (dims); mx_minmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (), l, n, u); return ret; } template <typename R> inline Array<R> do_mx_cumminmax_op (const Array<R>& src, int dim, void (*mx_cumminmax_op) (const R *, R *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; dim_vector dims = src.dims (); get_extent_triplet (dims, dim, l, n, u); Array<R> ret (dims); mx_cumminmax_op (src.data (), ret.fortran_vec (), l, n, u); return ret; } template <typename R> inline Array<R> do_mx_cumminmax_op (const Array<R>& src, Array<octave_idx_type>& idx, int dim, void (*mx_cumminmax_op) (const R *, R *, octave_idx_type *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; dim_vector dims = src.dims (); get_extent_triplet (dims, dim, l, n, u); Array<R> ret (dims); if (idx.dims () != dims) idx = Array<octave_idx_type> (dims); mx_cumminmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (), l, n, u); return ret; } template <typename R> inline Array<R> do_mx_diff_op (const Array<R>& src, int dim, octave_idx_type order, void (*mx_diff_op) (const R *, R *, octave_idx_type, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; if (order <= 0) return src; dim_vector dims = src.dims (); get_extent_triplet (dims, dim, l, n, u); if (dim >= dims.ndims ()) dims.resize (dim+1, 1); if (dims(dim) <= order) { dims(dim) = 0; return Array<R> (dims); } else { dims(dim) -= order; } Array<R> ret (dims); mx_diff_op (src.data (), ret.fortran_vec (), l, n, u, order); return ret; } // Fast extra-precise summation. According to // T. Ogita, S. M. Rump, S. Oishi: // Accurate Sum And Dot Product, // SIAM J. Sci. Computing, Vol. 26, 2005 template <typename T> inline void twosum_accum (T& s, T& e, const T& x) { T s1 = s + x; T t = s1 - s; T e1 = (s - (s1 - t)) + (x - t); s = s1; e += e1; } template <typename T> inline T mx_inline_xsum (const T *v, octave_idx_type n) { T s, e; s = e = 0; for (octave_idx_type i = 0; i < n; i++) twosum_accum (s, e, v[i]); return s + e; } template <typename T> inline void mx_inline_xsum (const T *v, T *r, octave_idx_type m, octave_idx_type n) { OCTAVE_LOCAL_BUFFER (T, e, m); for (octave_idx_type i = 0; i < m; i++) e[i] = r[i] = T (); for (octave_idx_type j = 0; j < n; j++) { for (octave_idx_type i = 0; i < m; i++) twosum_accum (r[i], e[i], v[i]); v += m; } for (octave_idx_type i = 0; i < m; i++) r[i] += e[i]; } OP_RED_FCNN (mx_inline_xsum, T, T) #endif