# HG changeset patch # User Jaroslav Hajek # Date 1257933567 -3600 # Node ID ef4c4186cb477cce509827e1ab41f2e14768b89c # Parent cfd0aa788ae19d642f3ff51261237b0286391e69 improve some mx_inline loops diff -r cfd0aa788ae1 -r ef4c4186cb47 liboctave/Array.h --- a/liboctave/Array.h Tue Nov 10 23:07:25 2009 -0500 +++ b/liboctave/Array.h Wed Nov 11 10:59:27 2009 +0100 @@ -676,25 +676,6 @@ return result; } - // This is non-breakable map, suitable for fast functions. Efficiency - // relies on compiler's ability to inline a function pointer. This seems - // to be OK with recent GCC. - template - Array - fastmap (U (*fcn) (typename ref_param::type)) const - { - octave_idx_type len = length (); - - const T *m = data (); - - Array result (dims ()); - U *p = result.fortran_vec (); - - std::transform (m, m + len, p, fcn); - - return result; - } - template friend class Array; private: diff -r cfd0aa788ae1 -r ef4c4186cb47 liboctave/CNDArray.cc --- a/liboctave/CNDArray.cc Tue Nov 10 23:07:25 2009 -0500 +++ b/liboctave/CNDArray.cc Wed Nov 11 10:59:27 2009 +0100 @@ -760,33 +760,31 @@ NDArray ComplexNDArray::abs (void) const { - return NDArray (mx_inline_cabs_dup (data (), length ()), - dims ()); + return do_mx_unary_map (*this); } boolNDArray ComplexNDArray::isnan (void) const { - return Array (fastmap (xisnan)); + return do_mx_unary_map (*this); } boolNDArray ComplexNDArray::isinf (void) const { - return Array (fastmap (xisinf)); + return do_mx_unary_map (*this); } boolNDArray ComplexNDArray::isfinite (void) const { - return Array (fastmap (xfinite)); + return do_mx_unary_map (*this); } ComplexNDArray conj (const ComplexNDArray& a) { - return ComplexNDArray (mx_inline_conj_dup (a.data (), a.length ()), - a.dims ()); + return do_mx_unary_map (a); } ComplexNDArray& diff -r cfd0aa788ae1 -r ef4c4186cb47 liboctave/ChangeLog --- a/liboctave/ChangeLog Tue Nov 10 23:07:25 2009 -0500 +++ b/liboctave/ChangeLog Wed Nov 11 10:59:27 2009 +0100 @@ -1,3 +1,21 @@ +2009-11-11 Jaroslav Hajek + + * mx-inlines.cc (mx_inline_map): New template loop. + (DEFMXMAPPER): New macro. + (DEFMXMAPPER2): Rename from DEFMXMAPPER. + (do_mx_unary_map): New applier. + (mx_inline_real, mx_inline_complex): New loops. + * dNDArray.cc (NDArray::abs, NDArray::isnan, NDArray::isinf, + NDArray::isfinite, real, imag): Use new constructs. + * fNDArray.cc (FloatNDArray::abs, FloatNDArray::isnan, + FloatNDArray::isinf, FloatNDArray::isfinite, real, imag): Ditto. + * CNDArray.cc (ComplexNDArray::abs, ComplexNDArray::isnan, + ComplexNDArray::isinf, ComplexNDArray::isfinite, conj): Use new + constructs. + * fCNDArray.cc (FloatComplexNDArray::abs, FloatComplexNDArray::isnan, + FloatComplexNDArray::isinf, FloatComplexNDArray::isfinite, conj): Use + new constructs. + 2009-11-10 John W. Eaton * mx-ops, sparse-mx-ops, vx-ops b/liboctave/vx-ops: diff -r cfd0aa788ae1 -r ef4c4186cb47 liboctave/dNDArray.cc --- a/liboctave/dNDArray.cc Tue Nov 10 23:07:25 2009 -0500 +++ b/liboctave/dNDArray.cc Wed Nov 11 10:59:27 2009 +0100 @@ -849,15 +849,13 @@ NDArray real (const ComplexNDArray& a) { - return NDArray (mx_inline_real_dup (a.data (), a.length ()), - a.dims ()); + return do_mx_unary_op (a, mx_inline_real); } NDArray imag (const ComplexNDArray& a) { - return NDArray (mx_inline_imag_dup (a.data (), a.length ()), - a.dims ()); + return do_mx_unary_op (a, mx_inline_imag); } NDArray& @@ -877,26 +875,25 @@ NDArray NDArray::abs (void) const { - return NDArray (mx_inline_fabs_dup (data (), length ()), - dims ()); + return do_mx_unary_map (*this); } boolNDArray NDArray::isnan (void) const { - return Array (fastmap (xisnan)); + return do_mx_unary_map (*this); } boolNDArray NDArray::isinf (void) const { - return Array (fastmap (xisinf)); + return do_mx_unary_map (*this); } boolNDArray NDArray::isfinite (void) const { - return Array (fastmap (xfinite)); + return do_mx_unary_map (*this); } Matrix diff -r cfd0aa788ae1 -r ef4c4186cb47 liboctave/fCNDArray.cc --- a/liboctave/fCNDArray.cc Tue Nov 10 23:07:25 2009 -0500 +++ b/liboctave/fCNDArray.cc Wed Nov 11 10:59:27 2009 +0100 @@ -755,33 +755,31 @@ FloatNDArray FloatComplexNDArray::abs (void) const { - return FloatNDArray (mx_inline_cabs_dup (data (), length ()), - dims ()); + return do_mx_unary_map (*this); } boolNDArray FloatComplexNDArray::isnan (void) const { - return Array (fastmap (xisnan)); + return do_mx_unary_map (*this); } boolNDArray FloatComplexNDArray::isinf (void) const { - return Array (fastmap (xisinf)); + return do_mx_unary_map (*this); } boolNDArray FloatComplexNDArray::isfinite (void) const { - return Array (fastmap (xfinite)); + return do_mx_unary_map (*this); } FloatComplexNDArray conj (const FloatComplexNDArray& a) { - return FloatComplexNDArray (mx_inline_conj_dup (a.data (), a.length ()), - a.dims ()); + return do_mx_unary_map (a); } FloatComplexNDArray& diff -r cfd0aa788ae1 -r ef4c4186cb47 liboctave/fNDArray.cc --- a/liboctave/fNDArray.cc Tue Nov 10 23:07:25 2009 -0500 +++ b/liboctave/fNDArray.cc Wed Nov 11 10:59:27 2009 +0100 @@ -807,15 +807,13 @@ FloatNDArray real (const FloatComplexNDArray& a) { - return FloatNDArray (mx_inline_real_dup (a.data (), a.length ()), - a.dims ()); + return do_mx_unary_op (a, mx_inline_real); } FloatNDArray imag (const FloatComplexNDArray& a) { - return FloatNDArray (mx_inline_imag_dup (a.data (), a.length ()), - a.dims ()); + return do_mx_unary_op (a, mx_inline_imag); } FloatNDArray& @@ -835,26 +833,25 @@ FloatNDArray FloatNDArray::abs (void) const { - return FloatNDArray (mx_inline_fabs_dup (data (), length ()), - dims ()); + return do_mx_unary_map (*this); } boolNDArray FloatNDArray::isnan (void) const { - return Array (fastmap (xisnan)); + return do_mx_unary_map (*this); } boolNDArray FloatNDArray::isinf (void) const { - return Array (fastmap (xisinf)); + return do_mx_unary_map (*this); } boolNDArray FloatNDArray::isfinite (void) const { - return Array (fastmap (xfinite)); + return do_mx_unary_map (*this); } FloatMatrix diff -r cfd0aa788ae1 -r ef4c4186cb47 liboctave/mx-inlines.cc --- a/liboctave/mx-inlines.cc Tue Nov 10 23:07:25 2009 -0500 +++ b/liboctave/mx-inlines.cc Wed Nov 11 10:59:27 2009 +0100 @@ -197,8 +197,20 @@ DEFMXANYNAN(Complex) DEFMXANYNAN(FloatComplex) +#define DEFMXMAPPER(F, FUN) \ +template \ +inline void F (size_t n, T *r, const T *x) \ +{ for (size_t i = 0; i < n; i++) r[i] = FUN (x[i]); } + +template +inline void mx_inline_real (size_t n, T *r, const std::complex* x) +{ for (size_t i = 0; i < n; i++) r[i] = x[i].real (); } +template +inline void mx_inline_imag (size_t n, T *r, const std::complex* x) +{ for (size_t i = 0; i < n; i++) r[i] = x[i].imag (); } + // Pairwise minimums/maximums -#define DEFMXMAPPER(F, FUN) \ +#define DEFMXMAPPER2(F, FUN) \ template \ inline void F (size_t n, T *r, const T *x, const T *y) \ { for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y[i]); } \ @@ -209,35 +221,54 @@ inline void F (size_t n, T *r, T x, const T *y) \ { for (size_t i = 0; i < n; i++) r[i] = FUN (x, y[i]); } -DEFMXMAPPER (mx_inline_xmin, xmin) -DEFMXMAPPER (mx_inline_xmax, xmax) +DEFMXMAPPER2 (mx_inline_xmin, xmin) +DEFMXMAPPER2 (mx_inline_xmax, xmax) -#define DEFMXLOCALMAPPER(F, FUN, T) \ -static void F (size_t n, T *r, const T *x, const T *y) \ -{ for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y[i]); } \ -static void F (size_t n, T *r, const T *x, T y) \ -{ for (size_t i = 0; i < n; i++) r[i] = FUN (x[i], y); } \ -static void F (size_t n, T *r, T x, const T *y) \ -{ for (size_t i = 0; i < n; i++) r[i] = FUN (x, y[i]); } +// Arbitrary function appliers. The function is a template parameter to enable +// inlining. +template +inline void mx_inline_map (size_t n, R *r, const X *x) +{ for (size_t i = 0; i < n; i++) r[i] = fun (x[i]); } + +template +inline void mx_inline_map (size_t n, R *r, const X *x) +{ for (size_t i = 0; i < n; i++) r[i] = fun (x[i]); } // Appliers. Since these call the operation just once, we pass it as // a pointer, to allow the compiler reduce number of instances. +#define AELEMT(ARRAY) typename ARRAY::element_type template inline RNDA do_mx_unary_op (const XNDA& x, - void (*op) (size_t, typename RNDA::element_type *, - const typename XNDA::element_type *)) + void (*op) (size_t, AELEMT(RNDA) *, + const AELEMT(XNDA) *)) { RNDA r (x.dims ()); op (r.length (), r.fortran_vec (), x.data ()); return r; } +// Shortcuts for applying mx_inline_map. + +template +inline RNDA +do_mx_unary_map (const XNDA& x) +{ + return do_mx_unary_op (x, mx_inline_map); +} + +template +inline RNDA +do_mx_unary_map (const XNDA& x) +{ + return do_mx_unary_op (x, mx_inline_map); +} + template inline RNDA& do_mx_inplace_op (RNDA& r, - void (*op) (size_t, typename RNDA::element_type *)) + void (*op) (size_t, AELEMT(RNDA) *)) { op (r.numel (), r.fortran_vec ()); return r; @@ -247,9 +278,9 @@ template inline RNDA do_mm_binary_op (const XNDA& x, const YNDA& y, - void (*op) (size_t, typename RNDA::element_type *, - const typename XNDA::element_type *, - const typename YNDA::element_type *), + void (*op) (size_t, AELEMT(RNDA) *, + const AELEMT(XNDA) *, + const AELEMT(YNDA) *), const char *opname) { dim_vector dx = x.dims (), dy = y.dims (); @@ -269,8 +300,8 @@ template inline RNDA do_ms_binary_op (const XNDA& x, const YS& y, - void (*op) (size_t, typename RNDA::element_type *, - const typename XNDA::element_type *, YS)) + void (*op) (size_t, AELEMT(RNDA) *, + const AELEMT(XNDA) *, YS)) { RNDA r (x.dims ()); op (r.length (), r.fortran_vec (), x.data (), y); @@ -280,8 +311,8 @@ template inline RNDA do_sm_binary_op (const XS& x, const YNDA& y, - void (*op) (size_t, typename RNDA::element_type *, XS, - const typename YNDA::element_type *)) + void (*op) (size_t, AELEMT(RNDA) *, XS, + const AELEMT(YNDA) *)) { RNDA r (y.dims ()); op (r.length (), r.fortran_vec (), x, y.data ()); @@ -291,8 +322,8 @@ template inline RNDA& do_mm_inplace_op (RNDA& r, const XNDA& x, - void (*op) (size_t, typename RNDA::element_type *, - const typename XNDA::element_type *), + void (*op) (size_t, AELEMT(RNDA) *, + const AELEMT(XNDA) *), const char *opname) { dim_vector dr = r.dims (), dx = x.dims (); @@ -306,7 +337,7 @@ template inline RNDA& do_ms_inplace_op (RNDA& r, const XS& x, - void (*op) (size_t, typename RNDA::element_type *, XS)) + void (*op) (size_t, AELEMT(RNDA) *, XS)) { op (r.length (), r.fortran_vec (), x); return r; @@ -1084,7 +1115,7 @@ template inline ArrayType do_mx_red_op (const Array& src, int dim, - void (*mx_red_op) (const T *, typename ArrayType::element_type *, + void (*mx_red_op) (const T *, AELEMT(ArrayType) *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; @@ -1108,7 +1139,7 @@ template inline ArrayType do_mx_cum_op (const Array& src, int dim, - void (*mx_cum_op) (const T *, typename ArrayType::element_type *, + void (*mx_cum_op) (const T *, AELEMT(ArrayType) *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; @@ -1125,8 +1156,7 @@ template inline ArrayType do_mx_minmax_op (const ArrayType& src, int dim, - void (*mx_minmax_op) (const typename ArrayType::element_type *, - typename ArrayType::element_type *, + void (*mx_minmax_op) (const AELEMT(ArrayType) *, AELEMT(ArrayType) *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; @@ -1146,8 +1176,7 @@ template inline ArrayType do_mx_minmax_op (const ArrayType& src, Array& idx, int dim, - void (*mx_minmax_op) (const typename ArrayType::element_type *, - typename ArrayType::element_type *, + void (*mx_minmax_op) (const AELEMT(ArrayType) *, AELEMT(ArrayType) *, octave_idx_type *, octave_idx_type, octave_idx_type, octave_idx_type)) { @@ -1171,8 +1200,7 @@ template inline ArrayType do_mx_cumminmax_op (const ArrayType& src, int dim, - void (*mx_cumminmax_op) (const typename ArrayType::element_type *, - typename ArrayType::element_type *, + void (*mx_cumminmax_op) (const AELEMT(ArrayType) *, AELEMT(ArrayType) *, octave_idx_type, octave_idx_type, octave_idx_type)) { octave_idx_type l, n, u; @@ -1188,8 +1216,7 @@ template inline ArrayType do_mx_cumminmax_op (const ArrayType& src, Array& idx, int dim, - void (*mx_cumminmax_op) (const typename ArrayType::element_type *, - typename ArrayType::element_type *, + void (*mx_cumminmax_op) (const AELEMT(ArrayType) *, AELEMT(ArrayType) *, octave_idx_type *, octave_idx_type, octave_idx_type, octave_idx_type)) { @@ -1209,8 +1236,7 @@ template inline ArrayType do_mx_diff_op (const ArrayType& src, int dim, octave_idx_type order, - void (*mx_diff_op) (const typename ArrayType::element_type *, - typename ArrayType::element_type *, + void (*mx_diff_op) (const AELEMT(ArrayType) *, AELEMT(ArrayType) *, octave_idx_type, octave_idx_type, octave_idx_type, octave_idx_type)) {