Mercurial > octave-nkf
annotate liboctave/mx-inlines.cc @ 8751:9f7ce4bf7650
optimize min/max functions
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Mon, 16 Feb 2009 08:52:00 +0100 |
parents | 1bd918cfb6e2 |
children | d0755c9db5ed |
rev | line source |
---|---|
3 | 1 /* |
2 | |
7017 | 3 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, |
4 2003, 2004, 2005, 2006, 2007 John W. Eaton | |
3 | 5 |
6 This file is part of Octave. | |
7 | |
8 Octave is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
7016 | 10 Free Software Foundation; either version 3 of the License, or (at your |
11 option) any later version. | |
3 | 12 |
13 Octave is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
7016 | 19 along with Octave; see the file COPYING. If not, see |
20 <http://www.gnu.org/licenses/>. | |
3 | 21 |
22 */ | |
23 | |
2828 | 24 #if !defined (octave_mx_inlines_h) |
25 #define octave_mx_inlines_h 1 | |
2804 | 26 |
27 #include <cstddef> | |
8650
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
28 #include <cmath> |
2804 | 29 |
5525 | 30 #include "quit.h" |
31 | |
1650 | 32 #include "oct-cmplx.h" |
461 | 33 |
8380
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
34 template <class R, class S> |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
35 inline void |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
36 mx_inline_fill_vs (R *r, size_t n, S s) |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
37 { |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
38 for (size_t i = 0; i < n; i++) |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
39 r[i] = s; |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
40 } |
dbe67764e628
fix & improve speed of diagonal matrix multiplication
Jaroslav Hajek <highegg@gmail.com>
parents:
7789
diff
changeset
|
41 |
2811 | 42 #define VS_OP_FCN(F, OP) \ |
43 template <class R, class V, class S> \ | |
3262 | 44 inline void \ |
2811 | 45 F ## _vs (R *r, const V *v, size_t n, S s) \ |
46 { \ | |
47 for (size_t i = 0; i < n; i++) \ | |
48 r[i] = v[i] OP s; \ | |
49 } | |
50 | |
3769 | 51 VS_OP_FCN (mx_inline_add, +) |
52 VS_OP_FCN (mx_inline_subtract, -) | |
53 VS_OP_FCN (mx_inline_multiply, *) | |
54 VS_OP_FCN (mx_inline_divide, /) | |
3 | 55 |
2804 | 56 #define VS_OP(F, OP, R, V, S) \ |
57 static inline R * \ | |
58 F (const V *v, size_t n, S s) \ | |
59 { \ | |
60 R *r = 0; \ | |
61 if (n > 0) \ | |
62 { \ | |
63 r = new R [n]; \ | |
2811 | 64 F ## _vs (r, v, n, s); \ |
2804 | 65 } \ |
66 return r; \ | |
67 } | |
3 | 68 |
2804 | 69 #define VS_OPS(R, V, S) \ |
3769 | 70 VS_OP (mx_inline_add, +, R, V, S) \ |
71 VS_OP (mx_inline_subtract, -, R, V, S) \ | |
72 VS_OP (mx_inline_multiply, *, R, V, S) \ | |
73 VS_OP (mx_inline_divide, /, R, V, S) | |
3 | 74 |
2804 | 75 VS_OPS (double, double, double) |
76 VS_OPS (Complex, double, Complex) | |
77 VS_OPS (Complex, Complex, double) | |
78 VS_OPS (Complex, Complex, Complex) | |
3 | 79 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
80 VS_OPS (float, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
81 VS_OPS (FloatComplex, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
82 VS_OPS (FloatComplex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
83 VS_OPS (FloatComplex, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
84 |
2811 | 85 #define SV_OP_FCN(F, OP) \ |
86 template <class R, class S, class V> \ | |
3262 | 87 inline void \ |
2811 | 88 F ## _sv (R *r, S s, const V *v, size_t n) \ |
89 { \ | |
90 for (size_t i = 0; i < n; i++) \ | |
91 r[i] = s OP v[i]; \ | |
92 } \ | |
93 | |
3769 | 94 SV_OP_FCN (mx_inline_add, +) |
95 SV_OP_FCN (mx_inline_subtract, -) | |
96 SV_OP_FCN (mx_inline_multiply, *) | |
97 SV_OP_FCN (mx_inline_divide, /) | |
2811 | 98 |
2804 | 99 #define SV_OP(F, OP, R, S, V) \ |
100 static inline R * \ | |
101 F (S s, const V *v, size_t n) \ | |
102 { \ | |
103 R *r = 0; \ | |
104 if (n > 0) \ | |
105 { \ | |
106 r = new R [n]; \ | |
2811 | 107 F ## _sv (r, s, v, n); \ |
2804 | 108 } \ |
109 return r; \ | |
110 } | |
3 | 111 |
2804 | 112 #define SV_OPS(R, S, V) \ |
3769 | 113 SV_OP (mx_inline_add, +, R, S, V) \ |
114 SV_OP (mx_inline_subtract, -, R, S, V) \ | |
115 SV_OP (mx_inline_multiply, *, R, S, V) \ | |
116 SV_OP (mx_inline_divide, /, R, S, V) | |
3 | 117 |
2804 | 118 SV_OPS (double, double, double) |
119 SV_OPS (Complex, double, Complex) | |
120 SV_OPS (Complex, Complex, double) | |
121 SV_OPS (Complex, Complex, Complex) | |
3 | 122 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
123 SV_OPS (float, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
124 SV_OPS (FloatComplex, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
125 SV_OPS (FloatComplex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
126 SV_OPS (FloatComplex, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
127 |
2811 | 128 #define VV_OP_FCN(F, OP) \ |
129 template <class R, class T1, class T2> \ | |
3262 | 130 inline void \ |
2811 | 131 F ## _vv (R *r, const T1 *v1, const T2 *v2, size_t n) \ |
132 { \ | |
133 for (size_t i = 0; i < n; i++) \ | |
134 r[i] = v1[i] OP v2[i]; \ | |
135 } \ | |
136 | |
3769 | 137 VV_OP_FCN (mx_inline_add, +) |
138 VV_OP_FCN (mx_inline_subtract, -) | |
139 VV_OP_FCN (mx_inline_multiply, *) | |
140 VV_OP_FCN (mx_inline_divide, /) | |
2811 | 141 |
2804 | 142 #define VV_OP(F, OP, R, T1, T2) \ |
143 static inline R * \ | |
144 F (const T1 *v1, const T2 *v2, size_t n) \ | |
145 { \ | |
146 R *r = 0; \ | |
147 if (n > 0) \ | |
148 { \ | |
149 r = new R [n]; \ | |
2811 | 150 F ## _vv (r, v1, v2, n); \ |
2804 | 151 } \ |
152 return r; \ | |
153 } | |
3 | 154 |
2804 | 155 #define VV_OPS(R, T1, T2) \ |
3769 | 156 VV_OP (mx_inline_add, +, R, T1, T2) \ |
157 VV_OP (mx_inline_subtract, -, R, T1, T2) \ | |
158 VV_OP (mx_inline_multiply, *, R, T1, T2) \ | |
159 VV_OP (mx_inline_divide, /, R, T1, T2) | |
3 | 160 |
2804 | 161 VV_OPS (double, double, double) |
162 VV_OPS (Complex, double, Complex) | |
163 VV_OPS (Complex, Complex, double) | |
164 VV_OPS (Complex, Complex, Complex) | |
3 | 165 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
166 VV_OPS (float, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
167 VV_OPS (FloatComplex, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
168 VV_OPS (FloatComplex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
169 VV_OPS (FloatComplex, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
170 |
2804 | 171 #define VS_OP2(F, OP, V, S) \ |
172 static inline V * \ | |
173 F (V *v, size_t n, S s) \ | |
174 { \ | |
175 for (size_t i = 0; i < n; i++) \ | |
176 v[i] OP s; \ | |
177 return v; \ | |
178 } | |
3 | 179 |
2804 | 180 #define VS_OP2S(V, S) \ |
3769 | 181 VS_OP2 (mx_inline_add2, +=, V, S) \ |
182 VS_OP2 (mx_inline_subtract2, -=, V, S) \ | |
183 VS_OP2 (mx_inline_multiply2, *=, V, S) \ | |
184 VS_OP2 (mx_inline_divide2, /=, V, S) \ | |
185 VS_OP2 (mx_inline_copy, =, V, S) | |
3 | 186 |
2804 | 187 VS_OP2S (double, double) |
188 VS_OP2S (Complex, double) | |
189 VS_OP2S (Complex, Complex) | |
3 | 190 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
191 VS_OP2S (float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
192 VS_OP2S (FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
193 VS_OP2S (FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
194 |
2804 | 195 #define VV_OP2(F, OP, T1, T2) \ |
196 static inline T1 * \ | |
197 F (T1 *v1, const T2 *v2, size_t n) \ | |
198 { \ | |
199 for (size_t i = 0; i < n; i++) \ | |
200 v1[i] OP v2[i]; \ | |
201 return v1; \ | |
202 } | |
3 | 203 |
2804 | 204 #define VV_OP2S(T1, T2) \ |
3769 | 205 VV_OP2 (mx_inline_add2, +=, T1, T2) \ |
206 VV_OP2 (mx_inline_subtract2, -=, T1, T2) \ | |
207 VV_OP2 (mx_inline_multiply2, *=, T1, T2) \ | |
208 VV_OP2 (mx_inline_divide2, /=, T1, T2) \ | |
209 VV_OP2 (mx_inline_copy, =, T1, T2) | |
3 | 210 |
2804 | 211 VV_OP2S (double, double) |
212 VV_OP2S (Complex, double) | |
213 VV_OP2S (Complex, Complex) | |
3 | 214 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
215 VV_OP2S (float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
216 VV_OP2S (FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
217 VV_OP2S (FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
218 |
2804 | 219 #define OP_EQ_FCN(T1, T2) \ |
220 static inline bool \ | |
3769 | 221 mx_inline_equal (const T1 *x, const T2 *y, size_t n) \ |
2804 | 222 { \ |
223 for (size_t i = 0; i < n; i++) \ | |
224 if (x[i] != y[i]) \ | |
225 return false; \ | |
226 return true; \ | |
227 } | |
3 | 228 |
2828 | 229 OP_EQ_FCN (bool, bool) |
2804 | 230 OP_EQ_FCN (char, char) |
231 OP_EQ_FCN (double, double) | |
232 OP_EQ_FCN (Complex, Complex) | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
233 OP_EQ_FCN (float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
234 OP_EQ_FCN (FloatComplex, FloatComplex) |
3 | 235 |
2804 | 236 #define OP_DUP_FCN(OP, F, R, T) \ |
237 static inline R * \ | |
238 F (const T *x, size_t n) \ | |
239 { \ | |
240 R *r = 0; \ | |
241 if (n > 0) \ | |
242 { \ | |
243 r = new R [n]; \ | |
244 for (size_t i = 0; i < n; i++) \ | |
245 r[i] = OP (x[i]); \ | |
246 } \ | |
247 return r; \ | |
248 } | |
3 | 249 |
3769 | 250 OP_DUP_FCN (, mx_inline_dup, double, double) |
251 OP_DUP_FCN (, mx_inline_dup, Complex, Complex) | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
252 OP_DUP_FCN (, mx_inline_dup, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
253 OP_DUP_FCN (, mx_inline_dup, FloatComplex, FloatComplex) |
3 | 254 |
2804 | 255 // These should really return a bool *. Also, they should probably be |
256 // in with a collection of other element-by-element boolean ops. | |
3769 | 257 OP_DUP_FCN (0.0 ==, mx_inline_not, double, double) |
258 OP_DUP_FCN (0.0 ==, mx_inline_not, double, Complex) | |
3 | 259 |
3769 | 260 OP_DUP_FCN (, mx_inline_make_complex, Complex, double) |
2804 | 261 |
3769 | 262 OP_DUP_FCN (-, mx_inline_change_sign, double, double) |
263 OP_DUP_FCN (-, mx_inline_change_sign, Complex, Complex) | |
3 | 264 |
8650
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
265 OP_DUP_FCN (std::abs, mx_inline_fabs_dup, double, double) |
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
266 OP_DUP_FCN (std::abs, mx_inline_cabs_dup, double, Complex) |
3769 | 267 OP_DUP_FCN (real, mx_inline_real_dup, double, Complex) |
268 OP_DUP_FCN (imag, mx_inline_imag_dup, double, Complex) | |
269 OP_DUP_FCN (conj, mx_inline_conj_dup, Complex, Complex) | |
3 | 270 |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
271 OP_DUP_FCN (0.0 ==, mx_inline_not, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
272 OP_DUP_FCN (static_cast<float>(0.0) ==, mx_inline_not, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
273 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
274 OP_DUP_FCN (, mx_inline_make_complex, FloatComplex, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
275 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
276 OP_DUP_FCN (-, mx_inline_change_sign, float, float) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
277 OP_DUP_FCN (-, mx_inline_change_sign, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
278 |
8650
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
279 OP_DUP_FCN (std::abs, mx_inline_fabs_dup, float, float) |
a1ae2aae903e
abs,real,imag,conj: use code from mx-inlines rather than the generic map
Jaroslav Hajek <highegg@gmail.com>
parents:
8380
diff
changeset
|
280 OP_DUP_FCN (std::abs, mx_inline_cabs_dup, float, FloatComplex) |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
281 OP_DUP_FCN (real, mx_inline_real_dup, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
282 OP_DUP_FCN (imag, mx_inline_imag_dup, float, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
283 OP_DUP_FCN (conj, mx_inline_conj_dup, FloatComplex, FloatComplex) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
284 |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
285 // NOTE: std::norm is NOT equivalent |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
286 template <class T> |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
287 inline T cabsq (const std::complex<T>& c) |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
288 { return c.real () * c.real () + c.imag () * c.imag (); } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
289 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
290 #define OP_RED_SUM(ac, el) ac += el |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
291 #define OP_RED_PROD(ac, el) ac *= el |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
292 #define OP_RED_SUMSQ(ac, el) ac += el*el |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
293 #define OP_RED_SUMSQC(ac, el) ac += cabsq (el) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
294 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
295 // default. works for integers and bool. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
296 template <class T> |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
297 inline bool xis_true (T x) { return x; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
298 template <class T> |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
299 inline bool xis_false (T x) { return ! x; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
300 // for octave_ints |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
301 template <class T> |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
302 inline bool xis_true (const octave_int<T>& x) { return x.value (); } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
303 template <class T> |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
304 inline bool xis_false (const octave_int<T>& x) { return ! x.value (); } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
305 // for reals, we want to ignore NaNs. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
306 inline bool xis_true (double x) { return ! xisnan (x) && x != 0.0; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
307 inline bool xis_false (double x) { return x == 0.0; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
308 inline bool xis_true (float x) { return ! xisnan (x) && x != 0.0f; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
309 inline bool xis_false (float x) { return x == 0.0f; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
310 // Ditto for complex. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
311 inline bool xis_true (const Complex& x) { return ! xisnan (x) && x != 0.0; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
312 inline bool xis_false (const Complex& x) { return x == 0.0; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
313 inline bool xis_true (const FloatComplex& x) { return ! xisnan (x) && x != 0.0f; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
314 inline bool xis_false (const FloatComplex& x) { return x == 0.0f; } |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
315 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
316 // The following two implement a simple short-circuiting. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
317 #define OP_RED_ANYC(ac, el) if (xis_true (el)) { ac = true; break; } else continue |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
318 #define OP_RED_ALLC(ac, el) if (xis_false (el)) { ac = false; break; } else continue |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
319 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
320 // Row any/all reductions are a tradeoff - we traverse the array by |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
321 // columns to gain cache coherence, but sacrifice short-circuiting for that. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
322 // For certain logical arrays, this could mean a significant loss. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
323 // A more sophisticated implementation could introduce a buffer of active |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
324 // row indices to achieve both. Right now, I don't see the operation as |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
325 // important enough. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
326 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
327 #define OP_RED_ANYR(ac, el) if (xis_true (el)) ac = true |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
328 #define OP_RED_ALLR(ac, el) if (xis_false (el)) ac = false |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
329 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
330 #define OP_RED_FCN(F, TSRC, TRES, OP, ZERO) \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
331 template <class T> \ |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
332 inline TRES \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
333 F (const TSRC* v, octave_idx_type n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
334 { \ |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
335 TRES ac = ZERO; \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
336 for (octave_idx_type i = 0; i < n; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
337 OP(ac, v[i]); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
338 return ac; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
339 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
340 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
341 OP_RED_FCN (mx_inline_sum, T, T, OP_RED_SUM, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
342 OP_RED_FCN (mx_inline_prod, T, T, OP_RED_PROD, 1) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
343 OP_RED_FCN (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
344 OP_RED_FCN (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
345 OP_RED_FCN (mx_inline_any, T, bool, OP_RED_ANYC, false) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
346 OP_RED_FCN (mx_inline_all, T, bool, OP_RED_ALLC, true) |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
347 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
348 |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
349 #define OP_RED_FCN2(F, TSRC, TRES, OP, ZERO) \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
350 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
351 inline void \ |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
352 F (const TSRC* v, TRES *r, octave_idx_type m, octave_idx_type n) \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
353 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
354 for (octave_idx_type i = 0; i < m; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
355 r[i] = ZERO; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
356 for (octave_idx_type j = 0; j < n; j++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
357 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
358 for (octave_idx_type i = 0; i < m; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
359 OP(r[i], v[i]); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
360 v += m; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
361 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
362 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
363 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
364 OP_RED_FCN2 (mx_inline_sum, T, T, OP_RED_SUM, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
365 OP_RED_FCN2 (mx_inline_prod, T, T, OP_RED_PROD, 1) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
366 OP_RED_FCN2 (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
367 OP_RED_FCN2 (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
368 OP_RED_FCN2 (mx_inline_any, T, bool, OP_RED_ANYR, false) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
369 OP_RED_FCN2 (mx_inline_all, T, bool, OP_RED_ALLR, true) |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
370 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
371 #define OP_RED_FCNN(F, TSRC, TRES) \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
372 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
373 inline void \ |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
374 F (const TSRC *v, TRES *r, octave_idx_type l, \ |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
375 octave_idx_type n, octave_idx_type u) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
376 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
377 if (l == 1) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
378 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
379 for (octave_idx_type i = 0; i < u; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
380 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
381 r[i] = F (v, n); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
382 v += n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
383 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
384 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
385 else \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
386 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
387 for (octave_idx_type i = 0; i < u; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
388 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
389 F (v, r, l, n); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
390 v += l*n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
391 r += l; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
392 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
393 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
394 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
395 |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
396 OP_RED_FCNN (mx_inline_sum, T, T) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
397 OP_RED_FCNN (mx_inline_prod, T, T) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
398 OP_RED_FCNN (mx_inline_sumsq, T, T) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
399 OP_RED_FCNN (mx_inline_sumsq, std::complex<T>, T) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
400 OP_RED_FCNN (mx_inline_any, T, bool) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
401 OP_RED_FCNN (mx_inline_all, T, bool) |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
402 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
403 #define OP_CUM_FCN(F, OP) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
404 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
405 inline void \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
406 F (const T *v, T *r, octave_idx_type n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
407 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
408 if (n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
409 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
410 T t = r[0] = v[0]; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
411 for (octave_idx_type i = 1; i < n; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
412 r[i] = t = t OP v[i]; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
413 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
414 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
415 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
416 OP_CUM_FCN (mx_inline_cumsum, +) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
417 OP_CUM_FCN (mx_inline_cumprod, *) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
418 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
419 #define OP_CUM_FCN2(F, OP) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
420 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
421 inline void \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
422 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
423 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
424 if (n) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
425 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
426 for (octave_idx_type i = 0; i < m; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
427 r[i] = v[i]; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
428 const T *r0 = r; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
429 for (octave_idx_type j = 1; j < n; j++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
430 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
431 r += m; v += m; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
432 for (octave_idx_type i = 0; i < m; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
433 r[i] = v[i] OP r0[i]; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
434 r0 += m; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
435 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
436 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
437 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
438 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
439 OP_CUM_FCN2 (mx_inline_cumsum, +) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
440 OP_CUM_FCN2 (mx_inline_cumprod, *) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
441 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
442 #define OP_CUM_FCNN(F) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
443 template <class T> \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
444 inline void \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
445 F (const T *v, T *r, octave_idx_type l, \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
446 octave_idx_type n, octave_idx_type u) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
447 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
448 if (l == 1) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
449 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
450 for (octave_idx_type i = 0; i < u; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
451 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
452 F (v, r, n); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
453 v += n; r += n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
454 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
455 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
456 else \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
457 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
458 for (octave_idx_type i = 0; i < u; i++) \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
459 { \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
460 F (v, r, l, n); \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
461 v += l*n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
462 r += l*n; \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
463 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
464 } \ |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
465 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
466 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
467 OP_CUM_FCNN (mx_inline_cumsum) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
468 OP_CUM_FCNN (mx_inline_cumprod) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
469 |
8751
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
470 #define OP_MINMAX_FCN(F, OP) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
471 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
472 void F (const T *v, T *r, octave_idx_type n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
473 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
474 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
475 T tmp = v[0]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
476 octave_idx_type i = 1; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
477 while (xisnan (tmp) && i < n) tmp = v[i++]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
478 for (i = 1; i < n; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
479 if (v[i] OP tmp) tmp = v[i]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
480 *r = tmp; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
481 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
482 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
483 void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
484 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
485 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
486 T tmp = v[0]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
487 octave_idx_type tmpi = 0; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
488 octave_idx_type i = 1; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
489 while (xisnan (tmp) && i < n) tmp = v[i++]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
490 for (i = 1; i < n; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
491 if (v[i] OP tmp) { tmp = v[i]; tmpi = i; }\ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
492 *r = tmp; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
493 *ri = tmpi; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
494 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
495 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
496 OP_MINMAX_FCN (mx_inline_min, <) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
497 OP_MINMAX_FCN (mx_inline_max, >) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
498 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
499 // Row reductions will be slightly complicated. We will proceed with checks |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
500 // for NaNs until we detect that no row will yield a NaN, in which case we |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
501 // proceed to a faster code. |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
502 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
503 #define OP_MINMAX_FCN2(F, OP) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
504 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
505 inline void \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
506 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
507 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
508 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
509 bool nan = false; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
510 octave_idx_type j = 0; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
511 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
512 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
513 r[i] = v[i]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
514 if (xisnan (v[i])) nan = true; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
515 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
516 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
517 while (nan && j < n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
518 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
519 nan = false; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
520 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
521 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
522 if (xisnan (v[i])) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
523 nan = true; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
524 else if (xisnan (r[i]) || v[i] OP r[i]) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
525 r[i] = v[i]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
526 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
527 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
528 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
529 while (j < n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
530 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
531 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
532 if (v[i] OP r[i]) r[i] = v[i]; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
533 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
534 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
535 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
536 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
537 inline void \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
538 F (const T *v, T *r, octave_idx_type *ri, \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
539 octave_idx_type m, octave_idx_type n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
540 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
541 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
542 bool nan = false; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
543 octave_idx_type j = 0; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
544 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
545 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
546 r[i] = v[i]; ri[i] = j; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
547 if (xisnan (v[i])) nan = true; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
548 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
549 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
550 while (nan && j < n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
551 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
552 nan = false; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
553 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
554 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
555 if (xisnan (v[i])) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
556 nan = true; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
557 else if (xisnan (r[i]) || v[i] OP r[i]) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
558 { r[i] = v[i]; ri[i] = j; } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
559 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
560 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
561 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
562 while (j < n) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
563 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
564 for (octave_idx_type i = 0; i < m; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
565 if (v[i] OP r[i]) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
566 { r[i] = v[i]; ri[i] = j; } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
567 j++; v += m; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
568 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
569 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
570 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
571 OP_MINMAX_FCN2 (mx_inline_min, <) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
572 OP_MINMAX_FCN2 (mx_inline_max, >) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
573 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
574 #define OP_MINMAX_FCNN(F) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
575 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
576 inline void \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
577 F (const T *v, T *r, octave_idx_type l, \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
578 octave_idx_type n, octave_idx_type u) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
579 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
580 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
581 if (l == 1) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
582 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
583 for (octave_idx_type i = 0; i < u; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
584 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
585 F (v, r, n); \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
586 v += n; r++; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
587 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
588 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
589 else \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
590 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
591 for (octave_idx_type i = 0; i < u; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
592 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
593 F (v, r, l, n); \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
594 v += l*n; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
595 r += l; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
596 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
597 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
598 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
599 template <class T> \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
600 inline void \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
601 F (const T *v, T *r, octave_idx_type *ri, \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
602 octave_idx_type l, octave_idx_type n, octave_idx_type u) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
603 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
604 if (! n) return; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
605 if (l == 1) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
606 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
607 for (octave_idx_type i = 0; i < u; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
608 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
609 F (v, r, ri, n); \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
610 v += n; r++; ri++; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
611 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
612 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
613 else \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
614 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
615 for (octave_idx_type i = 0; i < u; i++) \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
616 { \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
617 F (v, r, ri, l, n); \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
618 v += l*n; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
619 r += l; ri += l; \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
620 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
621 } \ |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
622 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
623 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
624 OP_MINMAX_FCNN (mx_inline_min) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
625 OP_MINMAX_FCNN (mx_inline_max) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
626 |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
627 // Assistant function |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
628 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
629 inline void |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
630 get_extent_triplet (const dim_vector& dims, int& dim, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
631 octave_idx_type& l, octave_idx_type& n, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
632 octave_idx_type& u) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
633 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
634 octave_idx_type ndims = dims.length (); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
635 if (dim >= ndims) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
636 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
637 l = dims.numel (); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
638 n = 1; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
639 u = 1; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
640 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
641 else |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
642 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
643 if (dim < 0) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
644 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
645 // find first non-singleton dim |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
646 for (dim = 0; dims(dim) == 1 && dim < ndims - 1; dim++) ; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
647 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
648 // calculate extent triplet. |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
649 l = 1, n = dims(dim), u = 1; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
650 for (octave_idx_type i = 0; i < dim; i++) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
651 l *= dims (i); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
652 for (octave_idx_type i = dim + 1; i < ndims; i++) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
653 u *= dims (i); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
654 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
655 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
656 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
657 // Appliers. |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
658 // FIXME: is this the best design? C++ gives a lot of options here... |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
659 // maybe it can be done without an explicit parameter? |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
660 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
661 template <class ArrayType, class T> |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
662 inline ArrayType |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
663 do_mx_red_op (const Array<T>& src, int dim, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
664 void (*mx_red_op) (const T *, typename ArrayType::element_type *, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
665 octave_idx_type, octave_idx_type, octave_idx_type)) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
666 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
667 octave_idx_type l, n, u; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
668 dim_vector dims = src.dims (); |
8743
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
669 // M*b inconsistency: sum([]) = 0 etc. |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
670 if (dims.length () == 2 && dims(0) == 0 && dims(1) == 0) |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
671 dims (1) = 1; |
1bd918cfb6e2
reimplement any & all using the new reduction code
Jaroslav Hajek <highegg@gmail.com>
parents:
8736
diff
changeset
|
672 |
8736
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
673 get_extent_triplet (dims, dim, l, n, u); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
674 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
675 // Reduction operation reduces the array size. |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
676 if (dim < dims.length ()) dims(dim) = 1; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
677 dims.chop_trailing_singletons (); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
678 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
679 ArrayType ret (dims); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
680 mx_red_op (src.data (), ret.fortran_vec (), l, n, u); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
681 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
682 return ret; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
683 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
684 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
685 template <class ArrayType, class T> |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
686 inline ArrayType |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
687 do_mx_cum_op (const Array<T>& src, int dim, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
688 void (*mx_cum_op) (const T *, typename ArrayType::element_type *, |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
689 octave_idx_type, octave_idx_type, octave_idx_type)) |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
690 { |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
691 octave_idx_type l, n, u; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
692 dim_vector dims = src.dims (); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
693 get_extent_triplet (dims, dim, l, n, u); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
694 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
695 // Cumulative operation doesn't reduce the array size. |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
696 ArrayType ret (dims); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
697 mx_cum_op (src.data (), ret.fortran_vec (), l, n, u); |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
698 |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
699 return ret; |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
700 } |
53b4fdeacc2e
improve reduction functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8650
diff
changeset
|
701 |
8751
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
702 template <class ArrayType> |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
703 inline ArrayType |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
704 do_mx_minmax_op (const ArrayType& src, int dim, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
705 void (*mx_minmax_op) (const typename ArrayType::element_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
706 typename ArrayType::element_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
707 octave_idx_type, octave_idx_type, octave_idx_type)) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
708 { |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
709 octave_idx_type l, n, u; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
710 dim_vector dims = src.dims (); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
711 get_extent_triplet (dims, dim, l, n, u); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
712 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
713 // If the dimension is zero, we don't do anything. |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
714 if (dim < dims.length () && dims(dim) != 0) dims(dim) = 1; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
715 dims.chop_trailing_singletons (); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
716 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
717 ArrayType ret (dims); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
718 mx_minmax_op (src.data (), ret.fortran_vec (), l, n, u); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
719 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
720 return ret; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
721 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
722 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
723 template <class ArrayType> |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
724 inline ArrayType |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
725 do_mx_minmax_op (const ArrayType& src, Array<octave_idx_type>& idx, int dim, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
726 void (*mx_minmax_op) (const typename ArrayType::element_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
727 typename ArrayType::element_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
728 octave_idx_type *, |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
729 octave_idx_type, octave_idx_type, octave_idx_type)) |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
730 { |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
731 octave_idx_type l, n, u; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
732 dim_vector dims = src.dims (); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
733 get_extent_triplet (dims, dim, l, n, u); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
734 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
735 // If the dimension is zero, we don't do anything. |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
736 if (dim < dims.length () && dims(dim) != 0) dims(dim) = 1; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
737 dims.chop_trailing_singletons (); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
738 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
739 ArrayType ret (dims); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
740 if (idx.dims () != dims) idx = Array<octave_idx_type> (dims); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
741 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
742 mx_minmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (), |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
743 l, n, u); |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
744 |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
745 return ret; |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
746 } |
9f7ce4bf7650
optimize min/max functions
Jaroslav Hajek <highegg@gmail.com>
parents:
8743
diff
changeset
|
747 |
3864 | 748 // Avoid some code duplication. Maybe we should use templates. |
749 | |
4015 | 750 #define MX_CUMULATIVE_OP(RET_TYPE, ELT_TYPE, OP) \ |
3864 | 751 \ |
5275 | 752 octave_idx_type nr = rows (); \ |
753 octave_idx_type nc = cols (); \ | |
3864 | 754 \ |
755 RET_TYPE retval (nr, nc); \ | |
756 \ | |
757 if (nr > 0 && nc > 0) \ | |
758 { \ | |
759 if ((nr == 1 && dim == -1) || dim == 1) \ | |
760 { \ | |
5275 | 761 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 762 { \ |
763 ELT_TYPE t = elem (i, 0); \ | |
5275 | 764 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 765 { \ |
766 retval.elem (i, j) = t; \ | |
767 if (j < nc - 1) \ | |
768 t OP elem (i, j+1); \ | |
769 } \ | |
770 } \ | |
771 } \ | |
772 else \ | |
773 { \ | |
5275 | 774 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 775 { \ |
776 ELT_TYPE t = elem (0, j); \ | |
5275 | 777 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 778 { \ |
779 retval.elem (i, j) = t; \ | |
780 if (i < nr - 1) \ | |
781 t OP elem (i+1, j); \ | |
782 } \ | |
783 } \ | |
784 } \ | |
785 } \ | |
786 \ | |
787 return retval | |
788 | |
789 #define MX_BASE_REDUCTION_OP(RET_TYPE, ROW_EXPR, COL_EXPR, INIT_VAL, \ | |
790 MT_RESULT) \ | |
791 \ | |
5275 | 792 octave_idx_type nr = rows (); \ |
793 octave_idx_type nc = cols (); \ | |
3864 | 794 \ |
795 RET_TYPE retval; \ | |
796 \ | |
797 if (nr > 0 && nc > 0) \ | |
798 { \ | |
799 if ((nr == 1 && dim == -1) || dim == 1) \ | |
800 { \ | |
801 retval.resize (nr, 1); \ | |
5275 | 802 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 803 { \ |
804 retval.elem (i, 0) = INIT_VAL; \ | |
5275 | 805 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 806 { \ |
807 ROW_EXPR; \ | |
808 } \ | |
809 } \ | |
810 } \ | |
811 else \ | |
812 { \ | |
813 retval.resize (1, nc); \ | |
5275 | 814 for (octave_idx_type j = 0; j < nc; j++) \ |
3864 | 815 { \ |
816 retval.elem (0, j) = INIT_VAL; \ | |
5275 | 817 for (octave_idx_type i = 0; i < nr; i++) \ |
3864 | 818 { \ |
819 COL_EXPR; \ | |
820 } \ | |
821 } \ | |
822 } \ | |
823 } \ | |
4139 | 824 else if (nc == 0 && (nr == 0 || (nr == 1 && dim == -1))) \ |
825 retval.resize (1, 1, MT_RESULT); \ | |
4015 | 826 else if (nr == 0 && (dim == 0 || dim == -1)) \ |
827 retval.resize (1, nc, MT_RESULT); \ | |
828 else if (nc == 0 && dim == 1) \ | |
829 retval.resize (nr, 1, MT_RESULT); \ | |
830 else \ | |
4139 | 831 retval.resize (nr > 0, nc > 0); \ |
3864 | 832 \ |
833 return retval | |
834 | |
835 #define MX_REDUCTION_OP_ROW_EXPR(OP) \ | |
836 retval.elem (i, 0) OP elem (i, j) | |
837 | |
838 #define MX_REDUCTION_OP_COL_EXPR(OP) \ | |
839 retval.elem (0, j) OP elem (i, j) | |
840 | |
841 #define MX_REDUCTION_OP(RET_TYPE, OP, INIT_VAL, MT_RESULT) \ | |
842 MX_BASE_REDUCTION_OP (RET_TYPE, \ | |
843 MX_REDUCTION_OP_ROW_EXPR (OP), \ | |
844 MX_REDUCTION_OP_COL_EXPR (OP), \ | |
845 INIT_VAL, MT_RESULT) | |
4015 | 846 |
847 #define MX_ANY_ALL_OP_ROW_CODE(TEST_OP, TEST_TRUE_VAL) \ | |
848 if (elem (i, j) TEST_OP 0.0) \ | |
849 { \ | |
850 retval.elem (i, 0) = TEST_TRUE_VAL; \ | |
851 break; \ | |
852 } | |
853 | |
854 #define MX_ANY_ALL_OP_COL_CODE(TEST_OP, TEST_TRUE_VAL) \ | |
855 if (elem (i, j) TEST_OP 0.0) \ | |
856 { \ | |
857 retval.elem (0, j) = TEST_TRUE_VAL; \ | |
858 break; \ | |
859 } | |
860 | |
861 #define MX_ANY_ALL_OP(DIM, INIT_VAL, TEST_OP, TEST_TRUE_VAL) \ | |
862 MX_BASE_REDUCTION_OP (boolMatrix, \ | |
863 MX_ANY_ALL_OP_ROW_CODE (TEST_OP, TEST_TRUE_VAL), \ | |
864 MX_ANY_ALL_OP_COL_CODE (TEST_OP, TEST_TRUE_VAL), \ | |
865 INIT_VAL, INIT_VAL) | |
866 | |
867 #define MX_ALL_OP(DIM) MX_ANY_ALL_OP (DIM, true, ==, false) | |
868 | |
869 #define MX_ANY_OP(DIM) MX_ANY_ALL_OP (DIM, false, !=, true) | |
870 | |
4563 | 871 #define MX_ND_ALL_EXPR elem (iter_idx) == 0 |
4556 | 872 |
4563 | 873 #define MX_ND_ANY_EXPR elem (iter_idx) != 0 |
4556 | 874 |
875 #define MX_ND_ALL_EVAL(TEST_EXPR) \ | |
5520 | 876 if (retval(result_idx) && (TEST_EXPR)) \ |
877 retval(result_idx) = 0; | |
4556 | 878 |
879 #define MX_ND_ANY_EVAL(TEST_EXPR) \ | |
5520 | 880 if (retval(result_idx) || (TEST_EXPR)) \ |
881 retval(result_idx) = 1; | |
4569 | 882 |
5520 | 883 #define MX_ND_REDUCTION(EVAL_EXPR, INIT_VAL, RET_TYPE) \ |
4556 | 884 \ |
4569 | 885 RET_TYPE retval; \ |
4556 | 886 \ |
4932 | 887 dim_vector dv = this->dims (); \ |
5520 | 888 int nd = this->ndims (); \ |
4563 | 889 \ |
5955 | 890 int empty = false; \ |
4563 | 891 \ |
5520 | 892 for (int i = 0; i < nd; i++) \ |
4563 | 893 { \ |
5955 | 894 if (dv(i) == 0) \ |
4563 | 895 { \ |
5955 | 896 empty = true; \ |
4563 | 897 break; \ |
898 } \ | |
899 } \ | |
900 \ | |
5972 | 901 if (nd == 2 && dv(0) == 0 && dv(1) == 0) \ |
902 { \ | |
903 retval.resize (dim_vector (1, 1), INIT_VAL); \ | |
904 return retval; \ | |
905 } \ | |
906 \ | |
5520 | 907 /* We need to find first non-singleton dim. */ \ |
908 \ | |
909 if (dim == -1) \ | |
4556 | 910 { \ |
5520 | 911 dim = 0; \ |
912 \ | |
913 for (int i = 0; i < nd; i++) \ | |
4563 | 914 { \ |
5520 | 915 if (dv(i) != 1) \ |
4563 | 916 { \ |
917 dim = i; \ | |
918 break; \ | |
919 } \ | |
920 } \ | |
921 } \ | |
5520 | 922 else if (dim >= nd) \ |
4563 | 923 { \ |
5520 | 924 dim = nd++; \ |
925 dv.resize (nd, 1); \ | |
4563 | 926 } \ |
927 \ | |
5523 | 928 /* R = op (A, DIM) \ |
4563 | 929 \ |
5523 | 930 The strategy here is to access the elements of A along the \ |
931 dimension specified by DIM. This means that we loop over each \ | |
5615 | 932 element of R and adjust the index into A as needed. Store the \ |
933 cummulative product of all dimensions of A in CP_SZ. The last \ | |
934 element of CP_SZ is the total number of elements of A. */ \ | |
4563 | 935 \ |
5615 | 936 Array<octave_idx_type> cp_sz (nd+1, 1); \ |
937 for (int i = 1; i <= nd; i++) \ | |
5523 | 938 cp_sz(i) = cp_sz(i-1)*dv(i-1); \ |
5520 | 939 \ |
5523 | 940 octave_idx_type reset_at = cp_sz(dim); \ |
941 octave_idx_type base_incr = cp_sz(dim+1); \ | |
942 octave_idx_type incr = cp_sz(dim); \ | |
943 octave_idx_type base = 0; \ | |
944 octave_idx_type next_base = base + base_incr; \ | |
945 octave_idx_type iter_idx = base; \ | |
946 octave_idx_type n_elts = dv(dim); \ | |
4556 | 947 \ |
5520 | 948 dv(dim) = 1; \ |
4556 | 949 \ |
5520 | 950 retval.resize (dv, INIT_VAL); \ |
4556 | 951 \ |
5955 | 952 if (! empty) \ |
4556 | 953 { \ |
5955 | 954 octave_idx_type nel = dv.numel (); \ |
5523 | 955 \ |
5955 | 956 octave_idx_type k = 1; \ |
957 \ | |
958 for (octave_idx_type result_idx = 0; result_idx < nel; result_idx++) \ | |
5523 | 959 { \ |
5955 | 960 OCTAVE_QUIT; \ |
5523 | 961 \ |
5955 | 962 for (octave_idx_type j = 0; j < n_elts; j++) \ |
963 { \ | |
964 OCTAVE_QUIT; \ | |
965 \ | |
966 EVAL_EXPR; \ | |
5520 | 967 \ |
5955 | 968 iter_idx += incr; \ |
969 } \ | |
5523 | 970 \ |
5955 | 971 if (k == reset_at) \ |
972 { \ | |
973 base = next_base; \ | |
974 next_base += base_incr; \ | |
975 iter_idx = base; \ | |
976 k = 1; \ | |
977 } \ | |
978 else \ | |
979 { \ | |
980 base++; \ | |
981 iter_idx = base; \ | |
982 k++; \ | |
983 } \ | |
984 } \ | |
4556 | 985 } \ |
986 \ | |
4871 | 987 retval.chop_trailing_singletons (); \ |
988 \ | |
4556 | 989 return retval |
4569 | 990 |
991 #define MX_ND_REAL_OP_REDUCTION(ASN_EXPR, INIT_VAL) \ | |
5520 | 992 MX_ND_REDUCTION (retval(result_idx) ASN_EXPR, INIT_VAL, NDArray) |
4569 | 993 |
994 #define MX_ND_COMPLEX_OP_REDUCTION(ASN_EXPR, INIT_VAL) \ | |
5520 | 995 MX_ND_REDUCTION (retval(result_idx) ASN_EXPR, INIT_VAL, ComplexNDArray) |
4569 | 996 |
997 #define MX_ND_ANY_ALL_REDUCTION(EVAL_EXPR, VAL) \ | |
5520 | 998 MX_ND_REDUCTION (EVAL_EXPR, VAL, boolNDArray) |
4556 | 999 |
5523 | 1000 #define MX_ND_CUMULATIVE_OP(RET_TYPE, ACC_TYPE, INIT_VAL, OP) \ |
1001 \ | |
4584 | 1002 RET_TYPE retval; \ |
1003 \ | |
4932 | 1004 dim_vector dv = this->dims (); \ |
5523 | 1005 int nd = this->ndims (); \ |
4584 | 1006 \ |
5955 | 1007 bool empty = false; \ |
4584 | 1008 \ |
5523 | 1009 for (int i = 0; i < nd; i++) \ |
4584 | 1010 { \ |
5955 | 1011 if (dv(i) == 0) \ |
4584 | 1012 { \ |
5955 | 1013 empty = true; \ |
4584 | 1014 break; \ |
1015 } \ | |
1016 } \ | |
1017 \ | |
5523 | 1018 /* We need to find first non-singleton dim. */ \ |
1019 \ | |
4584 | 1020 if (dim == -1) \ |
1021 { \ | |
5523 | 1022 dim = 0; \ |
1023 \ | |
1024 for (int i = 0; i < nd; i++) \ | |
4584 | 1025 { \ |
5523 | 1026 if (dv(i) != 1) \ |
4584 | 1027 { \ |
1028 dim = i; \ | |
1029 break; \ | |
1030 } \ | |
1031 } \ | |
1032 } \ | |
5523 | 1033 else if (dim >= nd) \ |
4584 | 1034 { \ |
5523 | 1035 dim = nd++; \ |
1036 dv.resize (nd, 1); \ | |
4584 | 1037 } \ |
1038 \ | |
5523 | 1039 /* R = op (A, DIM) \ |
4584 | 1040 \ |
5523 | 1041 The strategy here is to access the elements of A along the \ |
1042 dimension specified by DIM. This means that we loop over each \ | |
5611 | 1043 element of R and adjust the index into A as needed. Store the \ |
5614 | 1044 cummulative product of all dimensions of A in CP_SZ. The last \ |
1045 element of CP_SZ is the total number of elements of A. */ \ | |
4584 | 1046 \ |
5611 | 1047 Array<octave_idx_type> cp_sz (nd+1, 1); \ |
1048 for (int i = 1; i <= nd; i++) \ | |
5523 | 1049 cp_sz(i) = cp_sz(i-1)*dv(i-1); \ |
4584 | 1050 \ |
5523 | 1051 octave_idx_type reset_at = cp_sz(dim); \ |
1052 octave_idx_type base_incr = cp_sz(dim+1); \ | |
1053 octave_idx_type incr = cp_sz(dim); \ | |
1054 octave_idx_type base = 0; \ | |
1055 octave_idx_type next_base = base + base_incr; \ | |
1056 octave_idx_type iter_idx = base; \ | |
1057 octave_idx_type n_elts = dv(dim); \ | |
4584 | 1058 \ |
5523 | 1059 retval.resize (dv, INIT_VAL); \ |
1060 \ | |
5955 | 1061 if (! empty) \ |
1062 { \ | |
1063 octave_idx_type nel = dv.numel () / n_elts; \ | |
5523 | 1064 \ |
5955 | 1065 octave_idx_type k = 1; \ |
4584 | 1066 \ |
5955 | 1067 for (octave_idx_type i = 0; i < nel; i++) \ |
1068 { \ | |
1069 OCTAVE_QUIT; \ | |
1070 \ | |
1071 ACC_TYPE prev_val = INIT_VAL; \ | |
1072 \ | |
1073 for (octave_idx_type j = 0; j < n_elts; j++) \ | |
1074 { \ | |
1075 OCTAVE_QUIT; \ | |
5523 | 1076 \ |
5955 | 1077 if (j == 0) \ |
1078 { \ | |
1079 retval(iter_idx) = elem (iter_idx); \ | |
1080 prev_val = retval(iter_idx); \ | |
1081 } \ | |
1082 else \ | |
1083 { \ | |
1084 prev_val = prev_val OP elem (iter_idx); \ | |
1085 retval(iter_idx) = prev_val; \ | |
1086 } \ | |
5523 | 1087 \ |
5955 | 1088 iter_idx += incr; \ |
1089 } \ | |
4584 | 1090 \ |
5955 | 1091 if (k == reset_at) \ |
5523 | 1092 { \ |
5955 | 1093 base = next_base; \ |
1094 next_base += base_incr; \ | |
1095 iter_idx = base; \ | |
1096 k = 1; \ | |
4584 | 1097 } \ |
1098 else \ | |
5523 | 1099 { \ |
5955 | 1100 base++; \ |
1101 iter_idx = base; \ | |
1102 k++; \ | |
1103 } \ | |
4584 | 1104 } \ |
5523 | 1105 } \ |
4584 | 1106 \ |
5523 | 1107 retval.chop_trailing_singletons (); \ |
1108 \ | |
4584 | 1109 return retval |
1110 | |
2804 | 1111 #endif |
3 | 1112 |
1113 /* | |
1114 ;;; Local Variables: *** | |
1115 ;;; mode: C++ *** | |
1116 ;;; End: *** | |
1117 */ |