Mercurial > octave
view liboctave/array/Array.cc @ 31198:863730dd0f83 stable
nextpow2: Fix for input between 0.5 and 1 (bug #62947).
* scripts/general/nextpow2.m: Switch to a naïve implementation using log2 with
a single output argument and ceil.
author | Markus Mützel <markus.muetzel@gmx.de> |
---|---|
date | Wed, 24 Aug 2022 17:15:34 +0200 |
parents | 796f54d4ddbf |
children | 4707df477065 |
line wrap: on
line source
//////////////////////////////////////////////////////////////////////// // // Copyright (C) 1993-2022 The Octave Project Developers // // See the file COPYRIGHT.md in the top-level directory of this // distribution or <https://octave.org/copyright/>. // // This file is part of Octave. // // Octave is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // Octave is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with Octave; see the file COPYING. If not, see // <https://www.gnu.org/licenses/>. // //////////////////////////////////////////////////////////////////////// // This file should not include config.h. It is only included in other // C++ source files that should have included config.h before including // this file. #include <ostream> #include "Array-util.h" #include "Array.h" #include "lo-mappers.h" #include "oct-locbuf.h" // One dimensional array class. Handles the reference counting for // all the derived classes. template <typename T, typename Alloc> typename Array<T, Alloc>::ArrayRep * Array<T, Alloc>::nil_rep (void) { static ArrayRep nr; return &nr; } // This is similar to the template for containers but specialized for Array. // Note we can't specialize a member without also specializing the class. template <typename T, typename Alloc> Array<T, Alloc>::Array (const Array<T, Alloc>& a, const dim_vector& dv) : m_dimensions (dv), m_rep (a.m_rep), m_slice_data (a.m_slice_data), m_slice_len (a.m_slice_len) { if (m_dimensions.safe_numel () != a.numel ()) { std::string dimensions_str = a.m_dimensions.str (); std::string new_dims_str = m_dimensions.str (); (*current_liboctave_error_handler) ("reshape: can't reshape %s array to %s array", dimensions_str.c_str (), new_dims_str.c_str ()); } // This goes here because if an exception is thrown by the above, // destructor will be never called. m_rep->m_count++; m_dimensions.chop_trailing_singletons (); } template <typename T, typename Alloc> void Array<T, Alloc>::fill (const T& val) { if (m_rep->m_count > 1) { --m_rep->m_count; m_rep = new ArrayRep (numel (), val); m_slice_data = m_rep->m_data; } else std::fill_n (m_slice_data, m_slice_len, val); } template <typename T, typename Alloc> void Array<T, Alloc>::clear (void) { if (--m_rep->m_count == 0) delete m_rep; m_rep = nil_rep (); m_rep->m_count++; m_slice_data = m_rep->m_data; m_slice_len = m_rep->m_len; m_dimensions = dim_vector (); } template <typename T, typename Alloc> void Array<T, Alloc>::clear (const dim_vector& dv) { if (--m_rep->m_count == 0) delete m_rep; m_rep = new ArrayRep (dv.safe_numel ()); m_slice_data = m_rep->m_data; m_slice_len = m_rep->m_len; m_dimensions = dv; m_dimensions.chop_trailing_singletons (); } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::squeeze (void) const { Array<T, Alloc> retval = *this; if (ndims () > 2) { bool dims_changed = false; dim_vector new_dimensions = m_dimensions; int k = 0; for (int i = 0; i < ndims (); i++) { if (m_dimensions(i) == 1) dims_changed = true; else new_dimensions(k++) = m_dimensions(i); } if (dims_changed) { switch (k) { case 0: new_dimensions = dim_vector (1, 1); break; case 1: { octave_idx_type tmp = new_dimensions(0); new_dimensions.resize (2); new_dimensions(0) = tmp; new_dimensions(1) = 1; } break; default: new_dimensions.resize (k); break; } } retval = Array<T, Alloc> (*this, new_dimensions); } return retval; } template <typename T, typename Alloc> octave_idx_type Array<T, Alloc>::compute_index (octave_idx_type i, octave_idx_type j) const { return ::compute_index (i, j, m_dimensions); } template <typename T, typename Alloc> octave_idx_type Array<T, Alloc>::compute_index (octave_idx_type i, octave_idx_type j, octave_idx_type k) const { return ::compute_index (i, j, k, m_dimensions); } template <typename T, typename Alloc> octave_idx_type Array<T, Alloc>::compute_index (const Array<octave_idx_type>& ra_idx) const { return ::compute_index (ra_idx, m_dimensions); } template <typename T, typename Alloc> T& Array<T, Alloc>::checkelem (octave_idx_type n) { // Do checks directly to avoid recomputing m_slice_len. if (n < 0) octave::err_invalid_index (n); if (n >= m_slice_len) octave::err_index_out_of_range (1, 1, n+1, m_slice_len, m_dimensions); return elem (n); } template <typename T, typename Alloc> T& Array<T, Alloc>::checkelem (octave_idx_type i, octave_idx_type j) { return elem (compute_index (i, j)); } template <typename T, typename Alloc> T& Array<T, Alloc>::checkelem (octave_idx_type i, octave_idx_type j, octave_idx_type k) { return elem (compute_index (i, j, k)); } template <typename T, typename Alloc> T& Array<T, Alloc>::checkelem (const Array<octave_idx_type>& ra_idx) { return elem (compute_index (ra_idx)); } template <typename T, typename Alloc> typename Array<T, Alloc>::crefT Array<T, Alloc>::checkelem (octave_idx_type n) const { // Do checks directly to avoid recomputing m_slice_len. if (n < 0) octave::err_invalid_index (n); if (n >= m_slice_len) octave::err_index_out_of_range (1, 1, n+1, m_slice_len, m_dimensions); return elem (n); } template <typename T, typename Alloc> typename Array<T, Alloc>::crefT Array<T, Alloc>::checkelem (octave_idx_type i, octave_idx_type j) const { return elem (compute_index (i, j)); } template <typename T, typename Alloc> typename Array<T, Alloc>::crefT Array<T, Alloc>::checkelem (octave_idx_type i, octave_idx_type j, octave_idx_type k) const { return elem (compute_index (i, j, k)); } template <typename T, typename Alloc> typename Array<T, Alloc>::crefT Array<T, Alloc>::checkelem (const Array<octave_idx_type>& ra_idx) const { return elem (compute_index (ra_idx)); } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::column (octave_idx_type k) const { octave_idx_type r = m_dimensions(0); return Array<T, Alloc> (*this, dim_vector (r, 1), k*r, k*r + r); } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::page (octave_idx_type k) const { octave_idx_type r = m_dimensions(0); octave_idx_type c = m_dimensions(1); octave_idx_type p = r*c; return Array<T, Alloc> (*this, dim_vector (r, c), k*p, k*p + p); } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::linear_slice (octave_idx_type lo, octave_idx_type up) const { if (up < lo) up = lo; return Array<T, Alloc> (*this, dim_vector (up - lo, 1), lo, up); } // Helper class for multi-d dimension permuting (generalized transpose). class rec_permute_helper { public: rec_permute_helper (const dim_vector& dv, const Array<octave_idx_type>& perm) : m_n (dv.ndims ()), m_top (0), m_dim (new octave_idx_type [2*m_n]), m_stride (m_dim + m_n), m_use_blk (false) { assert (m_n == perm.numel ()); // Get cumulative dimensions. OCTAVE_LOCAL_BUFFER (octave_idx_type, cdim, m_n+1); cdim[0] = 1; for (int i = 1; i < m_n+1; i++) cdim[i] = cdim[i-1] * dv(i-1); // Setup the permuted strides. for (int k = 0; k < m_n; k++) { int kk = perm(k); m_dim[k] = dv(kk); m_stride[k] = cdim[kk]; } // Reduce contiguous runs. for (int k = 1; k < m_n; k++) { if (m_stride[k] == m_stride[m_top]*m_dim[m_top]) m_dim[m_top] *= m_dim[k]; else { m_top++; m_dim[m_top] = m_dim[k]; m_stride[m_top] = m_stride[k]; } } // Determine whether we can use block transposes. m_use_blk = m_top >= 1 && m_stride[1] == 1 && m_stride[0] == m_dim[1]; } // No copying! rec_permute_helper (const rec_permute_helper&) = delete; rec_permute_helper& operator = (const rec_permute_helper&) = delete; ~rec_permute_helper (void) { delete [] m_dim; } template <typename T> void permute (const T *src, T *dest) const { do_permute (src, dest, m_top); } // Helper method for fast blocked transpose. template <typename T> static T * blk_trans (const T *src, T *dest, octave_idx_type nr, octave_idx_type nc) { static const octave_idx_type m = 8; OCTAVE_LOCAL_BUFFER (T, blk, m*m); for (octave_idx_type kr = 0; kr < nr; kr += m) for (octave_idx_type kc = 0; kc < nc; kc += m) { octave_idx_type lr = std::min (m, nr - kr); octave_idx_type lc = std::min (m, nc - kc); if (lr == m && lc == m) { const T *ss = src + kc * nr + kr; for (octave_idx_type j = 0; j < m; j++) for (octave_idx_type i = 0; i < m; i++) blk[j*m+i] = ss[j*nr + i]; T *dd = dest + kr * nc + kc; for (octave_idx_type j = 0; j < m; j++) for (octave_idx_type i = 0; i < m; i++) dd[j*nc+i] = blk[i*m+j]; } else { const T *ss = src + kc * nr + kr; for (octave_idx_type j = 0; j < lc; j++) for (octave_idx_type i = 0; i < lr; i++) blk[j*m+i] = ss[j*nr + i]; T *dd = dest + kr * nc + kc; for (octave_idx_type j = 0; j < lr; j++) for (octave_idx_type i = 0; i < lc; i++) dd[j*nc+i] = blk[i*m+j]; } } return dest + nr*nc; } private: // Recursive N-D generalized transpose template <typename T> T * do_permute (const T *src, T *dest, int lev) const { if (lev == 0) { octave_idx_type step = m_stride[0]; octave_idx_type len = m_dim[0]; if (step == 1) { std::copy_n (src, len, dest); dest += len; } else { for (octave_idx_type i = 0, j = 0; i < len; i++, j += step) dest[i] = src[j]; dest += len; } } else if (m_use_blk && lev == 1) dest = blk_trans (src, dest, m_dim[1], m_dim[0]); else { octave_idx_type step = m_stride[lev]; octave_idx_type len = m_dim[lev]; for (octave_idx_type i = 0, j = 0; i < len; i++, j+= step) dest = do_permute (src + i * step, dest, lev-1); } return dest; } //-------- // STRIDE occupies the last half of the space allocated for dim to // avoid a double allocation. int m_n; int m_top; octave_idx_type *m_dim; octave_idx_type *m_stride; bool m_use_blk; }; template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::permute (const Array<octave_idx_type>& perm_vec_arg, bool inv) const { Array<T, Alloc> retval; Array<octave_idx_type> perm_vec = perm_vec_arg; dim_vector dv = dims (); int perm_vec_len = perm_vec_arg.numel (); if (perm_vec_len < dv.ndims ()) (*current_liboctave_error_handler) ("%s: invalid permutation vector", inv ? "ipermute" : "permute"); dim_vector dv_new = dim_vector::alloc (perm_vec_len); // Append singleton dimensions as needed. dv.resize (perm_vec_len, 1); // Need this array to check for identical elements in permutation array. OCTAVE_LOCAL_BUFFER_INIT (bool, checked, perm_vec_len, false); bool identity = true; // Find dimension vector of permuted array. for (int i = 0; i < perm_vec_len; i++) { octave_idx_type perm_elt = perm_vec.elem (i); if (perm_elt >= perm_vec_len || perm_elt < 0) (*current_liboctave_error_handler) ("%s: permutation vector contains an invalid element", inv ? "ipermute" : "permute"); if (checked[perm_elt]) (*current_liboctave_error_handler) ("%s: permutation vector cannot contain identical elements", inv ? "ipermute" : "permute"); else { checked[perm_elt] = true; identity = identity && perm_elt == i; } } if (identity) return *this; if (inv) { for (int i = 0; i < perm_vec_len; i++) perm_vec(perm_vec_arg(i)) = i; } for (int i = 0; i < perm_vec_len; i++) dv_new(i) = dv(perm_vec(i)); retval = Array<T, Alloc> (dv_new); if (numel () > 0) { rec_permute_helper rh (dv, perm_vec); rh.permute (data (), retval.fortran_vec ()); } return retval; } // Helper class for multi-d index reduction and recursive // indexing/indexed assignment. Rationale: we could avoid recursion // using a state machine instead. However, using recursion is much // more amenable to possible parallelization in the future. // Also, the recursion solution is cleaner and more understandable. class rec_index_helper { public: rec_index_helper (const dim_vector& dv, const Array<octave::idx_vector>& ia) : m_n (ia.numel ()), m_top (0), m_dim (new octave_idx_type [2*m_n]), m_cdim (m_dim + m_n), m_idx (new octave::idx_vector [m_n]) { assert (m_n > 0 && (dv.ndims () == std::max (m_n, 2))); m_dim[0] = dv(0); m_cdim[0] = 1; m_idx[0] = ia(0); for (int i = 1; i < m_n; i++) { // Try reduction... if (m_idx[m_top].maybe_reduce (m_dim[m_top], ia(i), dv(i))) { // Reduction successful, fold dimensions. m_dim[m_top] *= dv(i); } else { // Unsuccessful, store index & cumulative m_dim. m_top++; m_idx[m_top] = ia(i); m_dim[m_top] = dv(i); m_cdim[m_top] = m_cdim[m_top-1] * m_dim[m_top-1]; } } } // No copying! rec_index_helper (const rec_index_helper&) = delete; rec_index_helper& operator = (const rec_index_helper&) = delete; ~rec_index_helper (void) { delete [] m_idx; delete [] m_dim; } template <typename T> void index (const T *src, T *dest) const { do_index (src, dest, m_top); } template <typename T> void assign (const T *src, T *dest) const { do_assign (src, dest, m_top); } template <typename T> void fill (const T& val, T *dest) const { do_fill (val, dest, m_top); } bool is_cont_range (octave_idx_type& l, octave_idx_type& u) const { return m_top == 0 && m_idx[0].is_cont_range (m_dim[0], l, u); } private: // Recursive N-D indexing template <typename T> T * do_index (const T *src, T *dest, int lev) const { if (lev == 0) dest += m_idx[0].index (src, m_dim[0], dest); else { octave_idx_type nn = m_idx[lev].length (m_dim[lev]); octave_idx_type d = m_cdim[lev]; for (octave_idx_type i = 0; i < nn; i++) dest = do_index (src + d*m_idx[lev].xelem (i), dest, lev-1); } return dest; } // Recursive N-D indexed assignment template <typename T> const T * do_assign (const T *src, T *dest, int lev) const { if (lev == 0) src += m_idx[0].assign (src, m_dim[0], dest); else { octave_idx_type nn = m_idx[lev].length (m_dim[lev]); octave_idx_type d = m_cdim[lev]; for (octave_idx_type i = 0; i < nn; i++) src = do_assign (src, dest + d*m_idx[lev].xelem (i), lev-1); } return src; } // Recursive N-D indexed assignment template <typename T> void do_fill (const T& val, T *dest, int lev) const { if (lev == 0) m_idx[0].fill (val, m_dim[0], dest); else { octave_idx_type nn = m_idx[lev].length (m_dim[lev]); octave_idx_type d = m_cdim[lev]; for (octave_idx_type i = 0; i < nn; i++) do_fill (val, dest + d*m_idx[lev].xelem (i), lev-1); } } //-------- // CDIM occupies the last half of the space allocated for dim to // avoid a double allocation. int m_n; int m_top; octave_idx_type *m_dim; octave_idx_type *m_cdim; octave::idx_vector *m_idx; }; // Helper class for multi-d recursive resizing // This handles resize () in an efficient manner, touching memory only // once (apart from reinitialization) class rec_resize_helper { public: rec_resize_helper (const dim_vector& ndv, const dim_vector& odv) : m_cext (nullptr), m_sext (nullptr), m_dext (nullptr), m_n (0) { int l = ndv.ndims (); assert (odv.ndims () == l); octave_idx_type ld = 1; int i = 0; for (; i < l-1 && ndv(i) == odv(i); i++) ld *= ndv(i); m_n = l - i; m_cext = new octave_idx_type [3*m_n]; // Trick to avoid three allocations m_sext = m_cext + m_n; m_dext = m_sext + m_n; octave_idx_type sld = ld; octave_idx_type dld = ld; for (int j = 0; j < m_n; j++) { m_cext[j] = std::min (ndv(i+j), odv(i+j)); m_sext[j] = sld *= odv(i+j); m_dext[j] = dld *= ndv(i+j); } m_cext[0] *= ld; } // No copying! rec_resize_helper (const rec_resize_helper&) = delete; rec_resize_helper& operator = (const rec_resize_helper&) = delete; ~rec_resize_helper (void) { delete [] m_cext; } template <typename T> void resize_fill (const T *src, T *dest, const T& rfv) const { do_resize_fill (src, dest, rfv, m_n-1); } private: // recursive resizing template <typename T> void do_resize_fill (const T *src, T *dest, const T& rfv, int lev) const { if (lev == 0) { std::copy_n (src, m_cext[0], dest); std::fill_n (dest + m_cext[0], m_dext[0] - m_cext[0], rfv); } else { octave_idx_type sd, dd, k; sd = m_sext[lev-1]; dd = m_dext[lev-1]; for (k = 0; k < m_cext[lev]; k++) do_resize_fill (src + k * sd, dest + k * dd, rfv, lev - 1); std::fill_n (dest + k * dd, m_dext[lev] - k * dd, rfv); } } //-------- octave_idx_type *m_cext; octave_idx_type *m_sext; octave_idx_type *m_dext; int m_n; }; template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::index (const octave::idx_vector& i) const { // Colon: // // object | index | result orientation // ---------+----------+------------------- // anything | colon | column vector // // Numeric array or logical mask: // // Note that logical mask indices are always transformed to vectors // before we see them. // // object | index | result orientation // ---------+----------+------------------- // vector | vector | indexed object // | other | same size as index // ---------+----------+------------------- // array | anything | same size as index octave_idx_type n = numel (); Array<T, Alloc> retval; if (i.is_colon ()) { // A(:) produces a shallow copy as a column vector. retval = Array<T, Alloc> (*this, dim_vector (n, 1)); } else { if (i.extent (n) != n) octave::err_index_out_of_range (1, 1, i.extent (n), n, m_dimensions); dim_vector result_dims = i.orig_dimensions (); octave_idx_type idx_len = i.length (); if (n != 1 && is_nd_vector () && idx_len != 1 && result_dims.is_nd_vector ()) { // Indexed object and index are both vectors. Set result size // and orientation as above. dim_vector dv = dims (); result_dims = dv.make_nd_vector (idx_len); } octave_idx_type l, u; if (idx_len != 0 && i.is_cont_range (n, l, u)) // If suitable, produce a shallow slice. retval = Array<T, Alloc> (*this, result_dims, l, u); else { // Don't use resize here to avoid useless initialization for POD // types. retval = Array<T, Alloc> (result_dims); if (idx_len != 0) i.index (data (), n, retval.fortran_vec ()); } } return retval; } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::index (const octave::idx_vector& i, const octave::idx_vector& j) const { // Get dimensions, allowing Fortran indexing in the 2nd dim. dim_vector dv = m_dimensions.redim (2); octave_idx_type r = dv(0); octave_idx_type c = dv(1); Array<T, Alloc> retval; if (i.is_colon () && j.is_colon ()) { // A(:,:) produces a shallow copy. retval = Array<T, Alloc> (*this, dv); } else { if (i.extent (r) != r) octave::err_index_out_of_range (2, 1, i.extent (r), r, m_dimensions); // throws if (j.extent (c) != c) octave::err_index_out_of_range (2, 2, j.extent (c), c, m_dimensions); // throws octave_idx_type n = numel (); octave_idx_type il = i.length (r); octave_idx_type jl = j.length (c); octave::idx_vector ii (i); if (ii.maybe_reduce (r, j, c)) { octave_idx_type l, u; if (ii.length () > 0 && ii.is_cont_range (n, l, u)) // If suitable, produce a shallow slice. retval = Array<T, Alloc> (*this, dim_vector (il, jl), l, u); else { // Don't use resize to avoid useless initialization for POD types. retval = Array<T, Alloc> (dim_vector (il, jl)); ii.index (data (), n, retval.fortran_vec ()); } } else { // Don't use resize to avoid useless initialization for POD types. retval = Array<T, Alloc> (dim_vector (il, jl)); const T *src = data (); T *dest = retval.fortran_vec (); for (octave_idx_type k = 0; k < jl; k++) dest += i.index (src + r * j.xelem (k), r, dest); } } return retval; } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::index (const Array<octave::idx_vector>& ia) const { int ial = ia.numel (); Array<T, Alloc> retval; // FIXME: is this dispatching necessary? if (ial == 1) retval = index (ia(0)); else if (ial == 2) retval = index (ia(0), ia(1)); else if (ial > 0) { // Get dimensions, allowing Fortran indexing in the last dim. dim_vector dv = m_dimensions.redim (ial); // Check for out of bounds conditions. bool all_colons = true; for (int i = 0; i < ial; i++) { if (ia(i).extent (dv(i)) != dv(i)) octave::err_index_out_of_range (ial, i+1, ia(i).extent (dv(i)), dv(i), m_dimensions); // throws all_colons = all_colons && ia(i).is_colon (); } if (all_colons) { // A(:,:,...,:) produces a shallow copy. dv.chop_trailing_singletons (); retval = Array<T, Alloc> (*this, dv); } else { // Form result dimensions. dim_vector rdv = dim_vector::alloc (ial); for (int i = 0; i < ial; i++) rdv(i) = ia(i).length (dv(i)); rdv.chop_trailing_singletons (); // Prepare for recursive indexing rec_index_helper rh (dv, ia); octave_idx_type l, u; if (rh.is_cont_range (l, u)) // If suitable, produce a shallow slice. retval = Array<T, Alloc> (*this, rdv, l, u); else { // Don't use resize to avoid useless initialization for POD types. retval = Array<T, Alloc> (rdv); // Do it. rh.index (data (), retval.fortran_vec ()); } } } return retval; } // The default fill value. Override if you want a different one. template <typename T, typename Alloc> T Array<T, Alloc>::resize_fill_value (void) const { static T zero = T (); return zero; } // Yes, we could do resize using index & assign. However, that would // possibly involve a lot more memory traffic than we actually need. template <typename T, typename Alloc> void Array<T, Alloc>::resize1 (octave_idx_type n, const T& rfv) { if (n < 0 || ndims () != 2) octave::err_invalid_resize (); dim_vector dv; // This is driven by Matlab's behavior of giving a *row* vector // on some out-of-bounds assignments. Specifically, Matlab // allows a(i) with out-of-bouds i when a is either of 0x0, 1x0, // 1x1, 0xN, and gives a row vector in all cases (yes, even the // last one, search me why). Giving a column vector would make // much more sense (given the way trailing singleton dims are // treated). bool invalid = false; if (rows () == 0 || rows () == 1) dv = dim_vector (1, n); else if (columns () == 1) dv = dim_vector (n, 1); else invalid = true; if (invalid) octave::err_invalid_resize (); octave_idx_type nx = numel (); if (n == nx - 1 && n > 0) { // Stack "pop" operation. if (m_rep->m_count == 1) m_slice_data[m_slice_len-1] = T (); m_slice_len--; m_dimensions = dv; } else if (n == nx + 1 && nx > 0) { // Stack "push" operation. if (m_rep->m_count == 1 && m_slice_data + m_slice_len < m_rep->m_data + m_rep->m_len) { m_slice_data[m_slice_len++] = rfv; m_dimensions = dv; } else { static const octave_idx_type max_stack_chunk = 1024; octave_idx_type nn = n + std::min (nx, max_stack_chunk); Array<T, Alloc> tmp (Array<T, Alloc> (dim_vector (nn, 1)), dv, 0, n); T *dest = tmp.fortran_vec (); std::copy_n (data (), nx, dest); dest[nx] = rfv; *this = tmp; } } else if (n != nx) { Array<T, Alloc> tmp = Array<T, Alloc> (dv); T *dest = tmp.fortran_vec (); octave_idx_type n0 = std::min (n, nx); octave_idx_type n1 = n - n0; std::copy_n (data (), n0, dest); std::fill_n (dest + n0, n1, rfv); *this = tmp; } } template <typename T, typename Alloc> void Array<T, Alloc>::resize2 (octave_idx_type r, octave_idx_type c, const T& rfv) { if (r < 0 || c < 0 || ndims () != 2) octave::err_invalid_resize (); octave_idx_type rx = rows (); octave_idx_type cx = columns (); if (r != rx || c != cx) { Array<T, Alloc> tmp = Array<T, Alloc> (dim_vector (r, c)); T *dest = tmp.fortran_vec (); octave_idx_type r0 = std::min (r, rx); octave_idx_type r1 = r - r0; octave_idx_type c0 = std::min (c, cx); octave_idx_type c1 = c - c0; const T *src = data (); if (r == rx) { std::copy_n (src, r * c0, dest); dest += r * c0; } else { for (octave_idx_type k = 0; k < c0; k++) { std::copy_n (src, r0, dest); src += rx; dest += r0; std::fill_n (dest, r1, rfv); dest += r1; } } std::fill_n (dest, r * c1, rfv); *this = tmp; } } template <typename T, typename Alloc> void Array<T, Alloc>::resize (const dim_vector& dv, const T& rfv) { int dvl = dv.ndims (); if (dvl == 2) resize2 (dv(0), dv(1), rfv); else if (m_dimensions != dv) { if (m_dimensions.ndims () > dvl || dv.any_neg ()) octave::err_invalid_resize (); Array<T, Alloc> tmp (dv); // Prepare for recursive resizing. rec_resize_helper rh (dv, m_dimensions.redim (dvl)); // Do it. rh.resize_fill (data (), tmp.fortran_vec (), rfv); *this = tmp; } } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::index (const octave::idx_vector& i, bool resize_ok, const T& rfv) const { Array<T, Alloc> tmp = *this; if (resize_ok) { octave_idx_type n = numel (); octave_idx_type nx = i.extent (n); if (n != nx) { if (i.is_scalar ()) return Array<T, Alloc> (dim_vector (1, 1), rfv); else tmp.resize1 (nx, rfv); } if (tmp.numel () != nx) return Array<T, Alloc> (); } return tmp.index (i); } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::index (const octave::idx_vector& i, const octave::idx_vector& j, bool resize_ok, const T& rfv) const { Array<T, Alloc> tmp = *this; if (resize_ok) { dim_vector dv = m_dimensions.redim (2); octave_idx_type r = dv(0); octave_idx_type c = dv(1); octave_idx_type rx = i.extent (r); octave_idx_type cx = j.extent (c); if (r != rx || c != cx) { if (i.is_scalar () && j.is_scalar ()) return Array<T, Alloc> (dim_vector (1, 1), rfv); else tmp.resize2 (rx, cx, rfv); } if (tmp.rows () != rx || tmp.columns () != cx) return Array<T, Alloc> (); } return tmp.index (i, j); } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::index (const Array<octave::idx_vector>& ia, bool resize_ok, const T& rfv) const { Array<T, Alloc> tmp = *this; if (resize_ok) { int ial = ia.numel (); dim_vector dv = m_dimensions.redim (ial); dim_vector dvx = dim_vector::alloc (ial); for (int i = 0; i < ial; i++) dvx(i) = ia(i).extent (dv(i)); if (! (dvx == dv)) { bool all_scalars = true; for (int i = 0; i < ial; i++) all_scalars = all_scalars && ia(i).is_scalar (); if (all_scalars) return Array<T, Alloc> (dim_vector (1, 1), rfv); else tmp.resize (dvx, rfv); if (tmp.m_dimensions != dvx) return Array<T, Alloc> (); } } return tmp.index (ia); } template <typename T, typename Alloc> void Array<T, Alloc>::assign (const octave::idx_vector& i, const Array<T, Alloc>& rhs, const T& rfv) { octave_idx_type n = numel (); octave_idx_type rhl = rhs.numel (); if (rhl != 1 && i.length (n) != rhl) octave::err_nonconformant ("=", dim_vector(i.length(n), 1), rhs.dims()); octave_idx_type nx = i.extent (n); bool colon = i.is_colon_equiv (nx); // Try to resize first if necessary. if (nx != n) { // Optimize case A = []; A(1:n) = X with A empty. if (m_dimensions.zero_by_zero () && colon) { if (rhl == 1) *this = Array<T, Alloc> (dim_vector (1, nx), rhs(0)); else *this = Array<T, Alloc> (rhs, dim_vector (1, nx)); return; } resize1 (nx, rfv); n = numel (); } if (colon) { // A(:) = X makes a full fill or a shallow copy. if (rhl == 1) fill (rhs(0)); else *this = rhs.reshape (m_dimensions); } else { if (rhl == 1) i.fill (rhs(0), n, fortran_vec ()); else i.assign (rhs.data (), n, fortran_vec ()); } } // Assignment to a 2-dimensional array template <typename T, typename Alloc> void Array<T, Alloc>::assign (const octave::idx_vector& i, const octave::idx_vector& j, const Array<T, Alloc>& rhs, const T& rfv) { bool initial_dims_all_zero = m_dimensions.all_zero (); // Get RHS extents, discarding singletons. dim_vector rhdv = rhs.dims (); // Get LHS extents, allowing Fortran indexing in the second dim. dim_vector dv = m_dimensions.redim (2); // Check for out-of-bounds and form resizing m_dimensions. dim_vector rdv; // In the special when all dimensions are zero, colons are allowed // to inquire the shape of RHS. The rules are more obscure, so we // solve that elsewhere. if (initial_dims_all_zero) rdv = zero_dims_inquire (i, j, rhdv); else { rdv(0) = i.extent (dv(0)); rdv(1) = j.extent (dv(1)); } bool isfill = rhs.numel () == 1; octave_idx_type il = i.length (rdv(0)); octave_idx_type jl = j.length (rdv(1)); rhdv.chop_all_singletons (); bool match = (isfill || (rhdv.ndims () == 2 && il == rhdv(0) && jl == rhdv(1))); match = match || (il == 1 && jl == rhdv(0) && rhdv(1) == 1); if (match) { bool all_colons = (i.is_colon_equiv (rdv(0)) && j.is_colon_equiv (rdv(1))); // Resize if requested. if (rdv != dv) { // Optimize case A = []; A(1:m, 1:n) = X if (dv.zero_by_zero () && all_colons) { if (isfill) *this = Array<T, Alloc> (rdv, rhs(0)); else *this = Array<T, Alloc> (rhs, rdv); return; } resize (rdv, rfv); dv = m_dimensions; } if (all_colons) { // A(:,:) = X makes a full fill or a shallow copy if (isfill) fill (rhs(0)); else *this = rhs.reshape (m_dimensions); } else { // The actual work. octave_idx_type n = numel (); octave_idx_type r = dv(0); octave_idx_type c = dv(1); octave::idx_vector ii (i); const T *src = rhs.data (); T *dest = fortran_vec (); // Try reduction first. if (ii.maybe_reduce (r, j, c)) { if (isfill) ii.fill (*src, n, dest); else ii.assign (src, n, dest); } else { if (isfill) { for (octave_idx_type k = 0; k < jl; k++) i.fill (*src, r, dest + r * j.xelem (k)); } else { for (octave_idx_type k = 0; k < jl; k++) src += i.assign (src, r, dest + r * j.xelem (k)); } } } } // any empty RHS can be assigned to an empty LHS else if ((il != 0 && jl != 0) || (rhdv(0) != 0 && rhdv(1) != 0)) octave::err_nonconformant ("=", il, jl, rhs.dim1 (), rhs.dim2 ()); } // Assignment to a multi-dimensional array template <typename T, typename Alloc> void Array<T, Alloc>::assign (const Array<octave::idx_vector>& ia, const Array<T, Alloc>& rhs, const T& rfv) { int ial = ia.numel (); // FIXME: is this dispatching necessary / desirable? if (ial == 1) assign (ia(0), rhs, rfv); else if (ial == 2) assign (ia(0), ia(1), rhs, rfv); else if (ial > 0) { bool initial_dims_all_zero = m_dimensions.all_zero (); // Get RHS extents, discarding singletons. dim_vector rhdv = rhs.dims (); // Get LHS extents, allowing Fortran indexing in the second dim. dim_vector dv = m_dimensions.redim (ial); // Get the extents forced by indexing. dim_vector rdv; // In the special when all dimensions are zero, colons are // allowed to inquire the shape of RHS. The rules are more // obscure, so we solve that elsewhere. if (initial_dims_all_zero) rdv = zero_dims_inquire (ia, rhdv); else { rdv = dim_vector::alloc (ial); for (int i = 0; i < ial; i++) rdv(i) = ia(i).extent (dv(i)); } // Check whether LHS and RHS match, up to singleton dims. bool match = true; bool all_colons = true; bool isfill = rhs.numel () == 1; rhdv.chop_all_singletons (); int j = 0; int rhdvl = rhdv.ndims (); for (int i = 0; i < ial; i++) { all_colons = all_colons && ia(i).is_colon_equiv (rdv(i)); octave_idx_type l = ia(i).length (rdv(i)); if (l == 1) continue; match = match && j < rhdvl && l == rhdv(j++); } match = match && (j == rhdvl || rhdv(j) == 1); match = match || isfill; if (match) { // Resize first if necessary. if (rdv != dv) { // Optimize case A = []; A(1:m, 1:n) = X if (dv.zero_by_zero () && all_colons) { rdv.chop_trailing_singletons (); if (isfill) *this = Array<T, Alloc> (rdv, rhs(0)); else *this = Array<T, Alloc> (rhs, rdv); return; } resize (rdv, rfv); dv = rdv; } if (all_colons) { // A(:,:,...,:) = X makes a full fill or a shallow copy. if (isfill) fill (rhs(0)); else *this = rhs.reshape (m_dimensions); } else { // Do the actual work. // Prepare for recursive indexing rec_index_helper rh (dv, ia); // Do it. if (isfill) rh.fill (rhs(0), fortran_vec ()); else rh.assign (rhs.data (), fortran_vec ()); } } else { // dimension mismatch, unless LHS and RHS both empty bool lhsempty, rhsempty; lhsempty = rhsempty = false; dim_vector lhs_dv = dim_vector::alloc (ial); for (int i = 0; i < ial; i++) { octave_idx_type l = ia(i).length (rdv(i)); lhs_dv(i) = l; lhsempty = lhsempty || (l == 0); rhsempty = rhsempty || (rhdv(j++) == 0); } if (! lhsempty || ! rhsempty) { lhs_dv.chop_trailing_singletons (); octave::err_nonconformant ("=", lhs_dv, rhdv); } } } } /* %!shared a %! a = [1 2; 3 4]; %!error <op1 is 1x2, op2 is 1x3> a(1,:) = [1 2 3] %!error <op1 is 2x1, op2 is 1x3> a(:,1) = [1 2 3] %!error <op1 is 2x2, op2 is 2x1> a(1:2,2:3) = [1;2] */ template <typename T, typename Alloc> void Array<T, Alloc>::delete_elements (const octave::idx_vector& i) { octave_idx_type n = numel (); if (i.is_colon ()) { *this = Array<T, Alloc> (); } else if (i.length (n) != 0) { if (i.extent (n) != n) octave::err_del_index_out_of_range (true, i.extent (n), n); octave_idx_type l, u; bool col_vec = ndims () == 2 && columns () == 1 && rows () != 1; if (i.is_scalar () && i(0) == n-1 && m_dimensions.isvector ()) { // Stack "pop" operation. resize1 (n-1); } else if (i.is_cont_range (n, l, u)) { // Special case deleting a contiguous range. octave_idx_type m = n + l - u; Array<T, Alloc> tmp (dim_vector (col_vec ? m : 1, ! col_vec ? m : 1)); const T *src = data (); T *dest = tmp.fortran_vec (); std::copy_n (src, l, dest); std::copy (src + u, src + n, dest + l); *this = tmp; } else { // Use index. *this = index (i.complement (n)); } } } template <typename T, typename Alloc> void Array<T, Alloc>::delete_elements (int dim, const octave::idx_vector& i) { if (dim < 0 || dim >= ndims ()) (*current_liboctave_error_handler) ("invalid dimension in delete_elements"); octave_idx_type n = m_dimensions(dim); if (i.is_colon ()) { *this = Array<T, Alloc> (); } else if (i.length (n) != 0) { if (i.extent (n) != n) octave::err_del_index_out_of_range (false, i.extent (n), n); octave_idx_type l, u; if (i.is_cont_range (n, l, u)) { // Special case deleting a contiguous range. octave_idx_type nd = n + l - u; octave_idx_type dl = 1; octave_idx_type du = 1; dim_vector rdv = m_dimensions; rdv(dim) = nd; for (int k = 0; k < dim; k++) dl *= m_dimensions(k); for (int k = dim + 1; k < ndims (); k++) du *= m_dimensions(k); // Special case deleting a contiguous range. Array<T, Alloc> tmp = Array<T, Alloc> (rdv); const T *src = data (); T *dest = tmp.fortran_vec (); l *= dl; u *= dl; n *= dl; for (octave_idx_type k = 0; k < du; k++) { std::copy_n (src, l, dest); dest += l; std::copy (src + u, src + n, dest); dest += n - u; src += n; } *this = tmp; } else { // Use index. Array<octave::idx_vector> ia (dim_vector (ndims (), 1), octave::idx_vector::colon); ia (dim) = i.complement (n); *this = index (ia); } } } template <typename T, typename Alloc> void Array<T, Alloc>::delete_elements (const Array<octave::idx_vector>& ia) { int ial = ia.numel (); if (ial == 1) delete_elements (ia(0)); else { int k, dim = -1; for (k = 0; k < ial; k++) { if (! ia(k).is_colon ()) { if (dim < 0) dim = k; else break; } } if (dim < 0) { dim_vector dv = m_dimensions; dv(0) = 0; *this = Array<T, Alloc> (dv); } else if (k == ial) { delete_elements (dim, ia(dim)); } else { // Allow the null assignment to succeed if it won't change // anything because the indices reference an empty slice, // provided that there is at most one non-colon (or // equivalent) index. So, we still have the requirement of // deleting a slice, but it is OK if the slice is empty. // For compatibility with Matlab, stop checking once we see // more than one non-colon index or an empty index. Matlab // considers "[]" to be an empty index but not "false". We // accept both. bool empty_assignment = false; int num_non_colon_indices = 0; int nd = ndims (); for (int i = 0; i < ial; i++) { octave_idx_type dim_len = (i >= nd ? 1 : m_dimensions(i)); if (ia(i).length (dim_len) == 0) { empty_assignment = true; break; } if (! ia(i).is_colon_equiv (dim_len)) { num_non_colon_indices++; if (num_non_colon_indices == 2) break; } } if (! empty_assignment) (*current_liboctave_error_handler) ("a null assignment can only have one non-colon index"); } } } template <typename T, typename Alloc> Array<T, Alloc>& Array<T, Alloc>::insert (const Array<T, Alloc>& a, octave_idx_type r, octave_idx_type c) { octave::idx_vector i (r, r + a.rows ()); octave::idx_vector j (c, c + a.columns ()); if (ndims () == 2 && a.ndims () == 2) assign (i, j, a); else { Array<octave::idx_vector> idx (dim_vector (a.ndims (), 1)); idx(0) = i; idx(1) = j; for (int k = 2; k < a.ndims (); k++) idx(k) = octave::idx_vector (0, a.m_dimensions(k)); assign (idx, a); } return *this; } template <typename T, typename Alloc> Array<T, Alloc>& Array<T, Alloc>::insert (const Array<T, Alloc>& a, const Array<octave_idx_type>& ra_idx) { octave_idx_type n = ra_idx.numel (); Array<octave::idx_vector> idx (dim_vector (n, 1)); const dim_vector dva = a.dims ().redim (n); for (octave_idx_type k = 0; k < n; k++) idx(k) = octave::idx_vector (ra_idx(k), ra_idx(k) + dva(k)); assign (idx, a); return *this; } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::transpose (void) const { assert (ndims () == 2); octave_idx_type nr = dim1 (); octave_idx_type nc = dim2 (); if (nr >= 8 && nc >= 8) { Array<T, Alloc> result (dim_vector (nc, nr)); // Reuse the implementation used for permuting. rec_permute_helper::blk_trans (data (), result.fortran_vec (), nr, nc); return result; } else if (nr > 1 && nc > 1) { Array<T, Alloc> result (dim_vector (nc, nr)); for (octave_idx_type j = 0; j < nc; j++) for (octave_idx_type i = 0; i < nr; i++) result.xelem (j, i) = xelem (i, j); return result; } else { // Fast transpose for vectors and empty matrices. return Array<T, Alloc> (*this, dim_vector (nc, nr)); } } template <typename T> static T no_op_fcn (const T& x) { return x; } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::hermitian (T (*fcn) (const T&)) const { assert (ndims () == 2); if (! fcn) fcn = no_op_fcn<T>; octave_idx_type nr = dim1 (); octave_idx_type nc = dim2 (); if (nr >= 8 && nc >= 8) { Array<T, Alloc> result (dim_vector (nc, nr)); // Blocked transpose to attempt to avoid cache misses. T buf[64]; octave_idx_type jj; for (jj = 0; jj < (nc - 8 + 1); jj += 8) { octave_idx_type ii; for (ii = 0; ii < (nr - 8 + 1); ii += 8) { // Copy to buffer for (octave_idx_type j = jj, k = 0, idxj = jj * nr; j < jj + 8; j++, idxj += nr) for (octave_idx_type i = ii; i < ii + 8; i++) buf[k++] = xelem (i + idxj); // Copy from buffer for (octave_idx_type i = ii, idxi = ii * nc; i < ii + 8; i++, idxi += nc) for (octave_idx_type j = jj, k = i - ii; j < jj + 8; j++, k+=8) result.xelem (j + idxi) = fcn (buf[k]); } if (ii < nr) for (octave_idx_type j = jj; j < jj + 8; j++) for (octave_idx_type i = ii; i < nr; i++) result.xelem (j, i) = fcn (xelem (i, j)); } for (octave_idx_type j = jj; j < nc; j++) for (octave_idx_type i = 0; i < nr; i++) result.xelem (j, i) = fcn (xelem (i, j)); return result; } else { Array<T, Alloc> result (dim_vector (nc, nr)); for (octave_idx_type j = 0; j < nc; j++) for (octave_idx_type i = 0; i < nr; i++) result.xelem (j, i) = fcn (xelem (i, j)); return result; } } /* ## Transpose tests for matrices of the tile size and plus or minus a row ## and with four tiles. %!shared m7, mt7, m8, mt8, m9, mt9 %! m7 = reshape (1 : 7*8, 8, 7); %! mt7 = [1:8; 9:16; 17:24; 25:32; 33:40; 41:48; 49:56]; %! m8 = reshape (1 : 8*8, 8, 8); %! mt8 = mt8 = [mt7; 57:64]; %! m9 = reshape (1 : 9*8, 8, 9); %! mt9 = [mt8; 65:72]; %!assert (m7', mt7) %!assert ((1i*m7).', 1i * mt7) %!assert ((1i*m7)', conj (1i * mt7)) %!assert (m8', mt8) %!assert ((1i*m8).', 1i * mt8) %!assert ((1i*m8)', conj (1i * mt8)) %!assert (m9', mt9) %!assert ((1i*m9).', 1i * mt9) %!assert ((1i*m9)', conj (1i * mt9)) %!assert ([m7, m8; m7, m8]', [mt7, mt7; mt8, mt8]) %!assert ((1i*[m7, m8; m7, m8]).', 1i * [mt7, mt7; mt8, mt8]) %!assert ((1i*[m7, m8; m7, m8])', conj (1i * [mt7, mt7; mt8, mt8])) %!assert ([m8, m8; m8, m8]', [mt8, mt8; mt8, mt8]) %!assert ((1i*[m8, m8; m8, m8]).', 1i * [mt8, mt8; mt8, mt8]) %!assert ((1i*[m8, m8; m8, m8])', conj (1i * [mt8, mt8; mt8, mt8])) %!assert ([m9, m8; m9, m8]', [mt9, mt9; mt8, mt8]) %!assert ((1i*[m9, m8; m9, m8]).', 1i * [mt9, mt9; mt8, mt8]) %!assert ((1i*[m9, m8; m9, m8])', conj (1i * [mt9, mt9; mt8, mt8])) */ template <typename T, typename Alloc> T * Array<T, Alloc>::fortran_vec (void) { make_unique (); return m_slice_data; } // Non-real types don't have NaNs. template <typename T> inline bool sort_isnan (typename ref_param<T>::type) { return false; } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::sort (int dim, sortmode mode) const { if (dim < 0) (*current_liboctave_error_handler) ("sort: invalid dimension"); Array<T, Alloc> m (dims ()); dim_vector dv = m.dims (); if (m.numel () < 1) return m; if (dim >= dv.ndims ()) dv.resize (dim+1, 1); octave_idx_type ns = dv(dim); octave_idx_type iter = dv.numel () / ns; octave_idx_type stride = 1; for (int i = 0; i < dim; i++) stride *= dv(i); T *v = m.fortran_vec (); const T *ov = data (); octave_sort<T> lsort; if (mode != UNSORTED) lsort.set_compare (mode); else return m; if (stride == 1) { // Special case along first dimension avoids gather/scatter AND directly // sorts into destination buffer for an 11% performance boost. for (octave_idx_type j = 0; j < iter; j++) { // Copy and partition out NaNs. // No need to special case integer types <T> from floating point // types <T> to avoid sort_isnan() test as it makes no discernible // performance impact. octave_idx_type kl = 0; octave_idx_type ku = ns; for (octave_idx_type i = 0; i < ns; i++) { T tmp = ov[i]; if (sort_isnan<T> (tmp)) v[--ku] = tmp; else v[kl++] = tmp; } // sort. lsort.sort (v, kl); if (ku < ns) { // NaNs are in reverse order std::reverse (v + ku, v + ns); if (mode == DESCENDING) std::rotate (v, v + ku, v + ns); } v += ns; ov += ns; } } else { OCTAVE_LOCAL_BUFFER (T, buf, ns); for (octave_idx_type j = 0; j < iter; j++) { octave_idx_type offset = j; octave_idx_type n_strides = j / stride; offset += n_strides * stride * (ns - 1); // gather and partition out NaNs. octave_idx_type kl = 0; octave_idx_type ku = ns; for (octave_idx_type i = 0; i < ns; i++) { T tmp = ov[i*stride + offset]; if (sort_isnan<T> (tmp)) buf[--ku] = tmp; else buf[kl++] = tmp; } // sort. lsort.sort (buf, kl); if (ku < ns) { // NaNs are in reverse order std::reverse (buf + ku, buf + ns); if (mode == DESCENDING) std::rotate (buf, buf + ku, buf + ns); } // scatter. for (octave_idx_type i = 0; i < ns; i++) v[i*stride + offset] = buf[i]; } } return m; } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::sort (Array<octave_idx_type>& sidx, int dim, sortmode mode) const { if (dim < 0 || dim >= ndims ()) (*current_liboctave_error_handler) ("sort: invalid dimension"); Array<T, Alloc> m (dims ()); dim_vector dv = m.dims (); if (m.numel () < 1) { sidx = Array<octave_idx_type> (dv); return m; } octave_idx_type ns = dv(dim); octave_idx_type iter = dv.numel () / ns; octave_idx_type stride = 1; for (int i = 0; i < dim; i++) stride *= dv(i); T *v = m.fortran_vec (); const T *ov = data (); octave_sort<T> lsort; sidx = Array<octave_idx_type> (dv); octave_idx_type *vi = sidx.fortran_vec (); if (mode != UNSORTED) lsort.set_compare (mode); else return m; if (stride == 1) { // Special case for dim 1 avoids gather/scatter for performance boost. // See comments in Array::sort (dim, mode). for (octave_idx_type j = 0; j < iter; j++) { // copy and partition out NaNs. octave_idx_type kl = 0; octave_idx_type ku = ns; for (octave_idx_type i = 0; i < ns; i++) { T tmp = ov[i]; if (sort_isnan<T> (tmp)) { --ku; v[ku] = tmp; vi[ku] = i; } else { v[kl] = tmp; vi[kl] = i; kl++; } } // sort. lsort.sort (v, vi, kl); if (ku < ns) { // NaNs are in reverse order std::reverse (v + ku, v + ns); std::reverse (vi + ku, vi + ns); if (mode == DESCENDING) { std::rotate (v, v + ku, v + ns); std::rotate (vi, vi + ku, vi + ns); } } v += ns; vi += ns; ov += ns; } } else { OCTAVE_LOCAL_BUFFER (T, buf, ns); OCTAVE_LOCAL_BUFFER (octave_idx_type, bufi, ns); for (octave_idx_type j = 0; j < iter; j++) { octave_idx_type offset = j; octave_idx_type n_strides = j / stride; offset += n_strides * stride * (ns - 1); // gather and partition out NaNs. octave_idx_type kl = 0; octave_idx_type ku = ns; for (octave_idx_type i = 0; i < ns; i++) { T tmp = ov[i*stride + offset]; if (sort_isnan<T> (tmp)) { --ku; buf[ku] = tmp; bufi[ku] = i; } else { buf[kl] = tmp; bufi[kl] = i; kl++; } } // sort. lsort.sort (buf, bufi, kl); if (ku < ns) { // NaNs are in reverse order std::reverse (buf + ku, buf + ns); std::reverse (bufi + ku, bufi + ns); if (mode == DESCENDING) { std::rotate (buf, buf + ku, buf + ns); std::rotate (bufi, bufi + ku, bufi + ns); } } // scatter. for (octave_idx_type i = 0; i < ns; i++) v[i*stride + offset] = buf[i]; for (octave_idx_type i = 0; i < ns; i++) vi[i*stride + offset] = bufi[i]; } } return m; } template <typename T, typename Alloc> typename Array<T, Alloc>::compare_fcn_type safe_comparator (sortmode mode, const Array<T, Alloc>& /* a */, bool /* allow_chk */) { if (mode == ASCENDING) return octave_sort<T>::ascending_compare; else if (mode == DESCENDING) return octave_sort<T>::descending_compare; else return nullptr; } template <typename T, typename Alloc> sortmode Array<T, Alloc>::issorted (sortmode mode) const { octave_sort<T> lsort; octave_idx_type n = numel (); if (n <= 1) return (mode == UNSORTED) ? ASCENDING : mode; if (mode == UNSORTED) { // Auto-detect mode. compare_fcn_type compare = safe_comparator (ASCENDING, *this, false); if (compare (elem (n-1), elem (0))) mode = DESCENDING; else mode = ASCENDING; } lsort.set_compare (safe_comparator (mode, *this, false)); if (! lsort.issorted (data (), n)) mode = UNSORTED; return mode; } template <typename T, typename Alloc> Array<octave_idx_type> Array<T, Alloc>::sort_rows_idx (sortmode mode) const { Array<octave_idx_type> idx; octave_sort<T> lsort (safe_comparator (mode, *this, true)); octave_idx_type r = rows (); octave_idx_type c = cols (); idx = Array<octave_idx_type> (dim_vector (r, 1)); lsort.sort_rows (data (), idx.fortran_vec (), r, c); return idx; } template <typename T, typename Alloc> sortmode Array<T, Alloc>::is_sorted_rows (sortmode mode) const { octave_sort<T> lsort; octave_idx_type r = rows (); octave_idx_type c = cols (); if (r <= 1 || c == 0) return (mode == UNSORTED) ? ASCENDING : mode; if (mode == UNSORTED) { // Auto-detect mode. compare_fcn_type compare = safe_comparator (ASCENDING, *this, false); octave_idx_type i; for (i = 0; i < cols (); i++) { T l = elem (0, i); T u = elem (rows () - 1, i); if (compare (l, u)) { if (mode == DESCENDING) { mode = UNSORTED; break; } else mode = ASCENDING; } else if (compare (u, l)) { if (mode == ASCENDING) { mode = UNSORTED; break; } else mode = DESCENDING; } } if (mode == UNSORTED && i == cols ()) mode = ASCENDING; } if (mode != UNSORTED) { lsort.set_compare (safe_comparator (mode, *this, false)); if (! lsort.is_sorted_rows (data (), r, c)) mode = UNSORTED; } return mode; } // Do a binary lookup in a sorted array. template <typename T, typename Alloc> octave_idx_type Array<T, Alloc>::lookup (const T& value, sortmode mode) const { octave_idx_type n = numel (); octave_sort<T> lsort; if (mode == UNSORTED) { // auto-detect mode if (n > 1 && lsort.descending_compare (elem (0), elem (n-1))) mode = DESCENDING; else mode = ASCENDING; } lsort.set_compare (mode); return lsort.lookup (data (), n, value); } template <typename T, typename Alloc> Array<octave_idx_type> Array<T, Alloc>::lookup (const Array<T, Alloc>& values, sortmode mode) const { octave_idx_type n = numel (); octave_idx_type nval = values.numel (); octave_sort<T> lsort; Array<octave_idx_type> idx (values.dims ()); if (mode == UNSORTED) { // auto-detect mode if (n > 1 && lsort.descending_compare (elem (0), elem (n-1))) mode = DESCENDING; else mode = ASCENDING; } lsort.set_compare (mode); // This determines the split ratio between the O(M*log2(N)) and O(M+N) // algorithms. static const double ratio = 1.0; sortmode vmode = UNSORTED; // Attempt the O(M+N) algorithm if M is large enough. if (nval > ratio * n / octave::math::log2 (n + 1.0)) { vmode = values.issorted (); // The table must not contain a NaN. if ((vmode == ASCENDING && sort_isnan<T> (values(nval-1))) || (vmode == DESCENDING && sort_isnan<T> (values(0)))) vmode = UNSORTED; } if (vmode != UNSORTED) lsort.lookup_sorted (data (), n, values.data (), nval, idx.fortran_vec (), vmode != mode); else lsort.lookup (data (), n, values.data (), nval, idx.fortran_vec ()); return idx; } template <typename T, typename Alloc> octave_idx_type Array<T, Alloc>::nnz (void) const { const T *src = data (); octave_idx_type nel = numel (); octave_idx_type retval = 0; const T zero = T (); for (octave_idx_type i = 0; i < nel; i++) if (src[i] != zero) retval++; return retval; } template <typename T, typename Alloc> Array<octave_idx_type> Array<T, Alloc>::find (octave_idx_type n, bool backward) const { Array<octave_idx_type> retval; const T *src = data (); octave_idx_type nel = numel (); const T zero = T (); if (n < 0 || n >= nel) { // We want all elements, which means we'll almost surely need // to resize. So count first, then allocate array of exact size. octave_idx_type cnt = 0; for (octave_idx_type i = 0; i < nel; i++) cnt += src[i] != zero; retval.clear (cnt, 1); octave_idx_type *dest = retval.fortran_vec (); for (octave_idx_type i = 0; i < nel; i++) if (src[i] != zero) *dest++ = i; } else { // We want a fixed max number of elements, usually small. So be // optimistic, alloc the array in advance, and then resize if // needed. retval.clear (n, 1); if (backward) { // Do the search as a series of successive single-element searches. octave_idx_type k = 0; octave_idx_type l = nel - 1; for (; k < n; k++) { for (; l >= 0 && src[l] == zero; l--) ; if (l >= 0) retval(k) = l--; else break; } if (k < n) retval.resize2 (k, 1); octave_idx_type *rdata = retval.fortran_vec (); std::reverse (rdata, rdata + k); } else { // Do the search as a series of successive single-element searches. octave_idx_type k = 0; octave_idx_type l = 0; for (; k < n; k++) { for (; l != nel && src[l] == zero; l++) ; if (l != nel) retval(k) = l++; else break; } if (k < n) retval.resize2 (k, 1); } } // Fixup return dimensions, for Matlab compatibility. // find (zeros (0,0)) -> zeros (0,0) // find (zeros (1,0)) -> zeros (1,0) // find (zeros (0,1)) -> zeros (0,1) // find (zeros (0,X)) -> zeros (0,1) // find (zeros (1,1)) -> zeros (0,0) !!!! WHY? // find (zeros (0,1,0)) -> zeros (0,0) // find (zeros (0,1,0,1)) -> zeros (0,0) etc if ((numel () == 1 && retval.isempty ()) || (rows () == 0 && dims ().numel (1) == 0)) retval.m_dimensions = dim_vector (); else if (rows () == 1 && ndims () == 2) retval.m_dimensions = dim_vector (1, retval.numel ()); return retval; } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::nth_element (const octave::idx_vector& n, int dim) const { if (dim < 0) (*current_liboctave_error_handler) ("nth_element: invalid dimension"); dim_vector dv = dims (); if (dim >= dv.ndims ()) dv.resize (dim+1, 1); octave_idx_type ns = dv(dim); octave_idx_type nn = n.length (ns); dv(dim) = std::min (nn, ns); dv.chop_trailing_singletons (); dim = std::min (dv.ndims (), static_cast<octave_idx_type> (dim)); Array<T, Alloc> m (dv); if (m.isempty ()) return m; sortmode mode = UNSORTED; octave_idx_type lo = 0; switch (n.idx_class ()) { case octave::idx_vector::class_scalar: mode = ASCENDING; lo = n(0); break; case octave::idx_vector::class_range: { octave_idx_type inc = n.increment (); if (inc == 1) { mode = ASCENDING; lo = n(0); } else if (inc == -1) { mode = DESCENDING; lo = ns - 1 - n(0); } } break; case octave::idx_vector::class_vector: // This case resolves bug #51329, a fallback to allow the given index // to be a sequential vector instead of the typical scalar or range if (n(1) - n(0) == 1) { mode = ASCENDING; lo = n(0); } else if (n(1) - n(0) == -1) { mode = DESCENDING; lo = ns - 1 - n(0); } // Ensure that the vector is actually an arithmetic contiguous sequence for (octave_idx_type i = 2; i < n.length () && mode != UNSORTED; i++) if ((mode == ASCENDING && n(i) - n(i-1) != 1) || (mode == DESCENDING && n(i) - n(i-1) != -1)) mode = UNSORTED; break; default: break; } if (mode == UNSORTED) (*current_liboctave_error_handler) ("nth_element: n must be a scalar or a contiguous range"); octave_idx_type up = lo + nn; if (lo < 0 || up > ns) (*current_liboctave_error_handler) ("nth_element: invalid element index"); octave_idx_type iter = numel () / ns; octave_idx_type stride = 1; for (int i = 0; i < dim; i++) stride *= dv(i); T *v = m.fortran_vec (); const T *ov = data (); OCTAVE_LOCAL_BUFFER (T, buf, ns); octave_sort<T> lsort; lsort.set_compare (mode); for (octave_idx_type j = 0; j < iter; j++) { octave_idx_type kl = 0; octave_idx_type ku = ns; if (stride == 1) { // copy without NaNs. for (octave_idx_type i = 0; i < ns; i++) { T tmp = ov[i]; if (sort_isnan<T> (tmp)) buf[--ku] = tmp; else buf[kl++] = tmp; } ov += ns; } else { octave_idx_type offset = j % stride; // copy without NaNs. for (octave_idx_type i = 0; i < ns; i++) { T tmp = ov[offset + i*stride]; if (sort_isnan<T> (tmp)) buf[--ku] = tmp; else buf[kl++] = tmp; } if (offset == stride-1) ov += ns*stride; } if (ku == ns) lsort.nth_element (buf, ns, lo, up); else if (mode == ASCENDING) lsort.nth_element (buf, ku, lo, std::min (ku, up)); else { octave_idx_type nnan = ns - ku; octave_idx_type zero = 0; lsort.nth_element (buf, ku, std::max (lo - nnan, zero), std::max (up - nnan, zero)); std::rotate (buf, buf + ku, buf + ns); } if (stride == 1) { for (octave_idx_type i = 0; i < nn; i++) v[i] = buf[lo + i]; v += nn; } else { octave_idx_type offset = j % stride; for (octave_idx_type i = 0; i < nn; i++) v[offset + stride * i] = buf[lo + i]; if (offset == stride-1) v += nn*stride; } } return m; } #define NO_INSTANTIATE_ARRAY_SORT_API(T, API) \ template <> API Array<T> \ Array<T>::sort (int, sortmode) const \ { \ return *this; \ } \ template <> API Array<T> \ Array<T>::sort (Array<octave_idx_type> &sidx, int, sortmode) const \ { \ sidx = Array<octave_idx_type> (); \ return *this; \ } \ template <> API sortmode \ Array<T>::issorted (sortmode) const \ { \ return UNSORTED; \ } \ API Array<T>::compare_fcn_type \ safe_comparator (sortmode, const Array<T>&, bool) \ { \ return nullptr; \ } \ template <> API Array<octave_idx_type> \ Array<T>::sort_rows_idx (sortmode) const \ { \ return Array<octave_idx_type> (); \ } \ template <> API sortmode \ Array<T>::is_sorted_rows (sortmode) const \ { \ return UNSORTED; \ } \ template <> API octave_idx_type \ Array<T>::lookup (T const &, sortmode) const \ { \ return 0; \ } \ template <> API Array<octave_idx_type> \ Array<T>::lookup (const Array<T>&, sortmode) const \ { \ return Array<octave_idx_type> (); \ } \ template <> API octave_idx_type \ Array<T>::nnz (void) const \ { \ return 0; \ } \ template <> API Array<octave_idx_type> \ Array<T>::find (octave_idx_type, bool) const \ { \ return Array<octave_idx_type> (); \ } \ template <> API Array<T> \ Array<T>::nth_element (const octave::idx_vector&, int) const { \ return Array<T> (); \ } #define NO_INSTANTIATE_ARRAY_SORT(T) NO_INSTANTIATE_ARRAY_SORT_API (T,) template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::diag (octave_idx_type k) const { dim_vector dv = dims (); octave_idx_type nd = dv.ndims (); Array<T, Alloc> d; if (nd > 2) (*current_liboctave_error_handler) ("Matrix must be 2-dimensional"); octave_idx_type nnr = dv(0); octave_idx_type nnc = dv(1); if (nnr == 0 && nnc == 0) ; // do nothing for empty matrix else if (nnr != 1 && nnc != 1) { // Extract diag from matrix if (k > 0) nnc -= k; else if (k < 0) nnr += k; if (nnr > 0 && nnc > 0) { octave_idx_type ndiag = (nnr < nnc) ? nnr : nnc; d.resize (dim_vector (ndiag, 1)); if (k > 0) { for (octave_idx_type i = 0; i < ndiag; i++) d.xelem (i) = elem (i, i+k); } else if (k < 0) { for (octave_idx_type i = 0; i < ndiag; i++) d.xelem (i) = elem (i-k, i); } else { for (octave_idx_type i = 0; i < ndiag; i++) d.xelem (i) = elem (i, i); } } else // Matlab returns [] 0x1 for out-of-range diagonal d.resize (dim_vector (0, 1)); } else { // Create diag matrix from vector octave_idx_type roff = 0; octave_idx_type coff = 0; if (k > 0) { roff = 0; coff = k; } else if (k < 0) { roff = -k; coff = 0; } if (nnr == 1) { octave_idx_type n = nnc + std::abs (k); d = Array<T, Alloc> (dim_vector (n, n), resize_fill_value ()); for (octave_idx_type i = 0; i < nnc; i++) d.xelem (i+roff, i+coff) = elem (0, i); } else { octave_idx_type n = nnr + std::abs (k); d = Array<T, Alloc> (dim_vector (n, n), resize_fill_value ()); for (octave_idx_type i = 0; i < nnr; i++) d.xelem (i+roff, i+coff) = elem (i, 0); } } return d; } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::diag (octave_idx_type m, octave_idx_type n) const { if (ndims () != 2 || (rows () != 1 && cols () != 1)) (*current_liboctave_error_handler) ("cat: invalid dimension"); Array<T, Alloc> retval (dim_vector (m, n), resize_fill_value ()); octave_idx_type nel = std::min (numel (), std::min (m, n)); for (octave_idx_type i = 0; i < nel; i++) retval.xelem (i, i) = xelem (i); return retval; } template <typename T, typename Alloc> Array<T, Alloc> Array<T, Alloc>::cat (int dim, octave_idx_type n, const Array<T, Alloc> *array_list) { // Default concatenation. bool (dim_vector::*concat_rule) (const dim_vector&, int) = &dim_vector::concat; if (dim == -1 || dim == -2) { concat_rule = &dim_vector::hvcat; dim = -dim - 1; } else if (dim < 0) (*current_liboctave_error_handler) ("cat: invalid dimension"); if (n == 1) return array_list[0]; else if (n == 0) return Array<T, Alloc> (); // Special case: // // cat (dim, [], ..., [], A, ...) // // with dim > 2, A not 0x0, and at least three arguments to // concatenate is equivalent to // // cat (dim, A, ...) // // Note that this check must be performed here because for full-on // braindead Matlab compatibility, we need to have things like // // cat (3, [], [], A) // // succeed, but to have things like // // cat (3, cat (3, [], []), A) // cat (3, zeros (0, 0, 2), A) // // fail. See also bug report #31615. octave_idx_type istart = 0; if (n > 2 && dim > 1) { for (octave_idx_type i = 0; i < n; i++) { dim_vector dv = array_list[i].dims (); if (dv.zero_by_zero ()) istart++; else break; } // Don't skip any initial arguments if they are all empty. if (istart >= n) istart = 0; } dim_vector dv = array_list[istart++].dims (); for (octave_idx_type i = istart; i < n; i++) if (! (dv.*concat_rule) (array_list[i].dims (), dim)) (*current_liboctave_error_handler) ("cat: dimension mismatch"); Array<T, Alloc> retval (dv); if (retval.isempty ()) return retval; int nidx = std::max (dv.ndims (), static_cast<octave_idx_type> (dim + 1)); Array<octave::idx_vector> idxa (dim_vector (nidx, 1), octave::idx_vector::colon); octave_idx_type l = 0; for (octave_idx_type i = 0; i < n; i++) { // NOTE: This takes some thinking, but no matter what the above rules // are, an empty array can always be skipped at this point, because // the result dimensions are already determined, and there is no way // an empty array may contribute a nonzero piece along the dimension // at this point, unless an empty array can be promoted to a non-empty // one (which makes no sense). I repeat, *no way*, think about it. if (array_list[i].isempty ()) continue; octave_quit (); octave_idx_type u; if (dim < array_list[i].ndims ()) u = l + array_list[i].dims ()(dim); else u = l + 1; idxa(dim) = octave::idx_vector (l, u); retval.assign (idxa, array_list[i]); l = u; } return retval; } template <typename T, typename Alloc> void Array<T, Alloc>::print_info (std::ostream& os, const std::string& prefix) const { os << prefix << "m_rep address: " << m_rep << '\n' << prefix << "m_rep->m_len: " << m_rep->m_len << '\n' << prefix << "m_rep->m_data: " << static_cast<void *> (m_rep->m_data) << '\n' << prefix << "m_rep->m_count: " << m_rep->m_count << '\n' << prefix << "m_slice_data: " << static_cast<void *> (m_slice_data) << '\n' << prefix << "m_slice_len: " << m_slice_len << '\n'; // 2D info: // // << pefix << "rows: " << rows () << "\n" // << prefix << "cols: " << cols () << "\n"; } template <typename T, typename Alloc> bool Array<T, Alloc>::optimize_dimensions (const dim_vector& dv) { bool retval = m_dimensions == dv; if (retval) m_dimensions = dv; return retval; } template <typename T, typename Alloc> void Array<T, Alloc>::instantiation_guard () { // This guards against accidental implicit instantiations. // Array<T, Alloc> instances should always be explicit and use INSTANTIATE_ARRAY. T::__xXxXx__ (); } #if defined (__clang__) # define INSTANTIATE_ARRAY(T, API) \ template <> API void \ Array<T>::instantiation_guard () { } \ \ template class API Array<T> #else # define INSTANTIATE_ARRAY(T, API) \ template <> API void \ Array<T>::instantiation_guard () { } \ \ template class Array<T> #endif // FIXME: is this used? template <typename T, typename Alloc> std::ostream& operator << (std::ostream& os, const Array<T, Alloc>& a) { dim_vector a_dims = a.dims (); int n_dims = a_dims.ndims (); os << n_dims << "-dimensional array"; if (n_dims) os << " (" << a_dims.str () << ')'; os <<"\n\n"; if (n_dims) { os << "data:"; Array<octave_idx_type> ra_idx (dim_vector (n_dims, 1), 0); // Number of times the first 2d-array is to be displayed. octave_idx_type m = 1; for (int i = 2; i < n_dims; i++) m *= a_dims(i); if (m == 1) { octave_idx_type rows = 0; octave_idx_type cols = 0; switch (n_dims) { case 2: rows = a_dims(0); cols = a_dims(1); for (octave_idx_type j = 0; j < rows; j++) { ra_idx(0) = j; for (octave_idx_type k = 0; k < cols; k++) { ra_idx(1) = k; os << ' ' << a.elem (ra_idx); } os << "\n"; } break; default: rows = a_dims(0); for (octave_idx_type k = 0; k < rows; k++) { ra_idx(0) = k; os << ' ' << a.elem (ra_idx); } break; } os << "\n"; } else { octave_idx_type rows = a_dims(0); octave_idx_type cols = a_dims(1); for (int i = 0; i < m; i++) { os << "\n(:,:,"; for (int j = 2; j < n_dims - 1; j++) os << ra_idx(j) + 1 << ','; os << ra_idx(n_dims - 1) + 1 << ") = \n"; for (octave_idx_type j = 0; j < rows; j++) { ra_idx(0) = j; for (octave_idx_type k = 0; k < cols; k++) { ra_idx(1) = k; os << ' ' << a.elem (ra_idx); } os << "\n"; } os << "\n"; if (i != m - 1) increment_index (ra_idx, a_dims, 2); } } } return os; }