comparison liboctave/MArray.cc @ 10396:a0b51ac0f88a

optimize accumdim with summation
author Jaroslav Hajek <highegg@gmail.com>
date Fri, 05 Mar 2010 12:31:30 +0100
parents b47ab50a6aa8
children fd0a3ac60b0e
comparison
equal deleted inserted replaced
10395:aeb5b1e47978 10396:a0b51ac0f88a
131 131
132 octave_quit (); 132 octave_quit ();
133 133
134 octave_idx_type len = std::min (idx.length (n), vals.length ()); 134 octave_idx_type len = std::min (idx.length (n), vals.length ());
135 idx.loop (len, _idxbinop_helper<T, xmax> (this->fortran_vec (), vals.data ())); 135 idx.loop (len, _idxbinop_helper<T, xmax> (this->fortran_vec (), vals.data ()));
136 }
137
138 #include <iostream>
139
140 template <class T>
141 void MArray<T>::idx_add_nd (const idx_vector& idx, const MArray<T>& vals, int dim)
142 {
143 int nd = std::max (this->ndims (), vals.ndims ());
144 if (dim < 0)
145 dim = vals.dims ().first_non_singleton ();
146 else if (dim > nd)
147 nd = dim;
148
149 // Check dimensions.
150 dim_vector ddv = Array<T>::dims ().redim (nd);
151 dim_vector sdv = vals.dims ().redim (nd);
152
153 octave_idx_type ext = idx.extent (ddv (dim));
154
155 if (ext > ddv(dim))
156 {
157 ddv(dim) = ext;
158 Array<T>::resize (ddv);
159 ext = ddv(dim);
160 }
161
162 octave_idx_type l,n,u,ns;
163 get_extent_triplet (ddv, dim, l, n, u);
164 ns = sdv(dim);
165
166 sdv(dim) = ddv(dim) = 0;
167 if (ddv != sdv)
168 (*current_liboctave_error_handler)
169 ("accumdim: dimension mismatch");
170
171 T *dst = Array<T>::fortran_vec ();
172 const T *src = vals.data ();
173 octave_idx_type len = idx.length (ns);
174
175 if (l == 1)
176 {
177 for (octave_idx_type j = 0; j < u; j++)
178 {
179 octave_quit ();
180
181 idx.loop (len, _idxadda_helper<T> (dst + j*n, src + j*ns));
182 }
183 }
184 else
185 {
186 for (octave_idx_type j = 0; j < u; j++)
187 {
188 octave_quit ();
189 for (octave_idx_type i = 0; i < len; i++)
190 {
191 octave_idx_type k = idx(i);
192
193 mx_inline_add2 (l, dst + l*k, src + l*i);
194 }
195
196 dst += l*n;
197 src += l*ns;
198 }
199 }
136 } 200 }
137 201
138 // N-dimensional array with math ops. 202 // N-dimensional array with math ops.
139 template <class T> 203 template <class T>
140 void 204 void