Mercurial > octave-nkf
annotate liboctave/MArray.cc @ 9546:1beb23d2b892
optimize op= in common cases
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Wed, 19 Aug 2009 13:47:59 +0200 |
parents | c2099a4d12ea |
children | 3a1dd361f978 |
rev | line source |
---|---|
237 | 1 /* |
2 | |
7017 | 3 Copyright (C) 1993, 1995, 1996, 1997, 2000, 2002, 2003, 2004, 2005, |
8920 | 4 2007, 2008 John W. Eaton |
8934
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
5 Copyright (C) 2009 VZLU Prague |
237 | 6 |
7 This file is part of Octave. | |
8 | |
9 Octave is free software; you can redistribute it and/or modify it | |
10 under the terms of the GNU General Public License as published by the | |
7016 | 11 Free Software Foundation; either version 3 of the License, or (at your |
12 option) any later version. | |
237 | 13 |
14 Octave is distributed in the hope that it will be useful, but WITHOUT | |
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
7016 | 20 along with Octave; see the file COPYING. If not, see |
21 <http://www.gnu.org/licenses/>. | |
237 | 22 |
23 */ | |
24 | |
25 #ifdef HAVE_CONFIG_H | |
1192 | 26 #include <config.h> |
237 | 27 #endif |
28 | |
29 #include "MArray.h" | |
4669 | 30 #include "Array-util.h" |
237 | 31 #include "lo-error.h" |
32 | |
1989 | 33 #include "MArray-defs.h" |
1213 | 34 |
1360 | 35 // One dimensional array with math ops. |
237 | 36 |
6508 | 37 template <class T> |
38 double | |
39 MArray<T>::norm (double) const | |
40 { | |
41 (*current_liboctave_error_handler) | |
42 ("norm: only implemented for double and complex values"); | |
43 | |
44 return 0; | |
45 } | |
46 | |
7789
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
47 template <class T> |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
48 float |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
49 MArray<T>::norm (float) const |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
50 { |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
51 (*current_liboctave_error_handler) |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
52 ("norm: only implemented for double and complex values"); |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
53 |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
54 return 0; |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
55 } |
82be108cc558
First attempt at single precision tyeps
David Bateman <dbateman@free.fr>
parents:
7017
diff
changeset
|
56 |
8934
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
57 template <class T> |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
58 struct _idxadds_helper |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
59 { |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
60 T *array; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
61 T val; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
62 _idxadds_helper (T *a, T v) : array (a), val (v) { } |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
63 void operator () (octave_idx_type i) |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
64 { array[i] += val; } |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
65 }; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
66 |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
67 template <class T> |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
68 struct _idxadda_helper |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
69 { |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
70 T *array; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
71 const T *vals; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
72 _idxadda_helper (T *a, const T *v) : array (a), vals (v) { } |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
73 void operator () (octave_idx_type i) |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
74 { array[i] += *vals++; } |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
75 }; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
76 |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
77 template <class T> |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
78 void |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
79 MArray<T>::idx_add (const idx_vector& idx, T val) |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
80 { |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
81 octave_idx_type n = this->length (); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
82 octave_idx_type ext = idx.extent (n); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
83 if (ext > n) |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
84 { |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
85 this->resize (ext); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
86 n = ext; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
87 } |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
88 |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
89 OCTAVE_QUIT; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
90 |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
91 octave_idx_type len = idx.length (n); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
92 idx.loop (len, _idxadds_helper<T> (this->fortran_vec (), val)); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
93 } |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
94 |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
95 template <class T> |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
96 void |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
97 MArray<T>::idx_add (const idx_vector& idx, const MArray<T>& vals) |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
98 { |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
99 octave_idx_type n = this->length (); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
100 octave_idx_type ext = idx.extent (n); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
101 if (ext > n) |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
102 { |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
103 this->resize (ext); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
104 n = ext; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
105 } |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
106 |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
107 OCTAVE_QUIT; |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
108 |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
109 octave_idx_type len = std::min (idx.length (n), vals.length ()); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
110 idx.loop (len, _idxadda_helper<T> (this->fortran_vec (), vals.data ())); |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
111 } |
c2099a4d12ea
partially optimize accumarray
Jaroslav Hajek <highegg@gmail.com>
parents:
8920
diff
changeset
|
112 |
237 | 113 // Element by element MArray by scalar ops. |
114 | |
115 template <class T> | |
1213 | 116 MArray<T>& |
1230 | 117 operator += (MArray<T>& a, const T& s) |
237 | 118 { |
9546
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
119 if (a.is_shared ()) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
120 return a = a + s; |
4646 | 121 DO_VS_OP2 (T, a, +=, s) |
1230 | 122 return a; |
237 | 123 } |
124 | |
125 template <class T> | |
1213 | 126 MArray<T>& |
1230 | 127 operator -= (MArray<T>& a, const T& s) |
237 | 128 { |
9546
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
129 if (a.is_shared ()) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
130 return a = a - s; |
4646 | 131 DO_VS_OP2 (T, a, -=, s) |
1230 | 132 return a; |
237 | 133 } |
134 | |
9546
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
135 template <class T> |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
136 MArray<T>& |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
137 operator *= (MArray<T>& a, const T& s) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
138 { |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
139 if (a.is_shared ()) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
140 return a = a * s; |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
141 DO_VS_OP2 (T, a, *=, s) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
142 return a; |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
143 } |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
144 |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
145 template <class T> |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
146 MArray<T>& |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
147 operator /= (MArray<T>& a, const T& s) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
148 { |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
149 if (a.is_shared ()) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
150 return a = a / s; |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
151 DO_VS_OP2 (T, a, /=, s) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
152 return a; |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
153 } |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
154 |
237 | 155 // Element by element MArray by MArray ops. |
156 | |
157 template <class T> | |
1213 | 158 MArray<T>& |
1230 | 159 operator += (MArray<T>& a, const MArray<T>& b) |
237 | 160 { |
9546
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
161 if (a.is_shared ()) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
162 return a = a + b; |
5275 | 163 octave_idx_type l = a.length (); |
1213 | 164 if (l > 0) |
237 | 165 { |
5275 | 166 octave_idx_type bl = b.length (); |
2383 | 167 if (l != bl) |
168 gripe_nonconformant ("operator +=", l, bl); | |
1213 | 169 else |
4646 | 170 DO_VV_OP2 (T, a, +=, b); |
237 | 171 } |
1230 | 172 return a; |
237 | 173 } |
174 | |
175 template <class T> | |
1213 | 176 MArray<T>& |
1230 | 177 operator -= (MArray<T>& a, const MArray<T>& b) |
237 | 178 { |
9546
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
179 if (a.is_shared ()) |
1beb23d2b892
optimize op= in common cases
Jaroslav Hajek <highegg@gmail.com>
parents:
8934
diff
changeset
|
180 return a = a - b; |
5275 | 181 octave_idx_type l = a.length (); |
1213 | 182 if (l > 0) |
237 | 183 { |
5275 | 184 octave_idx_type bl = b.length (); |
2383 | 185 if (l != bl) |
186 gripe_nonconformant ("operator -=", l, bl); | |
1213 | 187 else |
4646 | 188 DO_VV_OP2 (T, a, -=, b); |
237 | 189 } |
1230 | 190 return a; |
237 | 191 } |
192 | |
1213 | 193 // Element by element MArray by scalar ops. |
194 | |
195 #define MARRAY_AS_OP(OP) \ | |
196 template <class T> \ | |
197 MArray<T> \ | |
198 operator OP (const MArray<T>& a, const T& s) \ | |
199 { \ | |
3504 | 200 MArray<T> result (a.length ()); \ |
201 T *r = result.fortran_vec (); \ | |
5275 | 202 octave_idx_type l = a.length (); \ |
3504 | 203 const T *v = a.data (); \ |
204 DO_VS_OP (r, l, v, OP, s); \ | |
205 return result; \ | |
1213 | 206 } |
237 | 207 |
1213 | 208 MARRAY_AS_OP (+) |
209 MARRAY_AS_OP (-) | |
210 MARRAY_AS_OP (*) | |
211 MARRAY_AS_OP (/) | |
212 | |
213 // Element by element scalar by MArray ops. | |
237 | 214 |
1213 | 215 #define MARRAY_SA_OP(OP) \ |
216 template <class T> \ | |
217 MArray<T> \ | |
218 operator OP (const T& s, const MArray<T>& a) \ | |
219 { \ | |
3504 | 220 MArray<T> result (a.length ()); \ |
221 T *r = result.fortran_vec (); \ | |
5275 | 222 octave_idx_type l = a.length (); \ |
3504 | 223 const T *v = a.data (); \ |
224 DO_SV_OP (r, l, s, OP, v); \ | |
225 return result; \ | |
226 } | |
237 | 227 |
1213 | 228 MARRAY_SA_OP(+) |
229 MARRAY_SA_OP(-) | |
230 MARRAY_SA_OP(*) | |
231 MARRAY_SA_OP(/) | |
232 | |
233 // Element by element MArray by MArray ops. | |
237 | 234 |
2383 | 235 #define MARRAY_AA_OP(FCN, OP) \ |
1213 | 236 template <class T> \ |
237 MArray<T> \ | |
238 FCN (const MArray<T>& a, const MArray<T>& b) \ | |
239 { \ | |
5275 | 240 octave_idx_type l = a.length (); \ |
241 octave_idx_type bl = b.length (); \ | |
2383 | 242 if (l != bl) \ |
1213 | 243 { \ |
2383 | 244 gripe_nonconformant (#FCN, l, bl); \ |
1213 | 245 return MArray<T> (); \ |
246 } \ | |
247 if (l == 0) \ | |
248 return MArray<T> (); \ | |
3504 | 249 MArray<T> result (l); \ |
250 T *r = result.fortran_vec (); \ | |
251 const T *x = a.data (); \ | |
252 const T *y = b.data (); \ | |
253 DO_VV_OP (r, l, x, OP, y); \ | |
254 return result; \ | |
1213 | 255 } |
237 | 256 |
2383 | 257 MARRAY_AA_OP (operator +, +) |
258 MARRAY_AA_OP (operator -, -) | |
259 MARRAY_AA_OP (product, *) | |
260 MARRAY_AA_OP (quotient, /) | |
237 | 261 |
262 // Unary MArray ops. | |
263 | |
264 template <class T> | |
265 MArray<T> | |
3574 | 266 operator + (const MArray<T>& a) |
267 { | |
268 return a; | |
269 } | |
270 | |
271 template <class T> | |
272 MArray<T> | |
237 | 273 operator - (const MArray<T>& a) |
274 { | |
5275 | 275 octave_idx_type l = a.length (); |
3504 | 276 MArray<T> result (l); |
277 T *r = result.fortran_vec (); | |
278 const T *x = a.data (); | |
279 NEG_V (r, l, x); | |
280 return result; | |
237 | 281 } |
282 | |
283 /* | |
284 ;;; Local Variables: *** | |
285 ;;; mode: C++ *** | |
286 ;;; End: *** | |
287 */ |