view liboctave/Array.cc @ 8377:25bc2d31e1bf

improve OCTAVE_LOCAL_BUFFER
author Jaroslav Hajek <highegg@gmail.com>
date Wed, 29 Oct 2008 16:52:10 +0100
parents 9238637cb81c
children ad8ed668e0a4
line wrap: on
line source

// Template array classes
/*

Copyright (C) 1993, 1994, 1995, 1996, 1997, 2000, 2002, 2003, 2004,
              2005, 2006, 2007 John W. Eaton 
Copyright (C) 2008 Jaroslav Hajek <highegg@gmail.com>

This file is part of Octave.

Octave is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.

Octave is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with Octave; see the file COPYING.  If not, see
<http://www.gnu.org/licenses/>.

*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <cassert>
#include <climits>

#include <iostream>
#include <sstream>
#include <vector>
#include <algorithm>
#include <new>

#include "Array.h"
#include "Array-util.h"
#include "idx-vector.h"
#include "lo-error.h"
#include "oct-locbuf.h"

// One dimensional array class.  Handles the reference counting for
// all the derived classes.

template <class T>
Array<T>::Array (const Array<T>& a, const dim_vector& dv)
  : rep (a.rep), dimensions (dv)
{
  rep->count++;

  if (a.numel () < dv.numel ())
    (*current_liboctave_error_handler)
      ("Array::Array (const Array&, const dim_vector&): dimension mismatch");
}

template <class T>
Array<T>::~Array (void)
{
  if (--rep->count <= 0)
    delete rep;
}

template <class T>
Array<T>&
Array<T>::operator = (const Array<T>& a)
{
  if (this != &a)
    {
      if (--rep->count <= 0)
	delete rep;

      rep = a.rep;
      rep->count++;

      dimensions = a.dimensions;
    }

  return *this;
}

template <class T>
Array<T>
Array<T>::squeeze (void) const
{
  Array<T> retval = *this;

  if (ndims () > 2)
    {
      bool dims_changed = false;

      dim_vector new_dimensions = dimensions;

      int k = 0;

      for (int i = 0; i < ndims (); i++)
	{
	  if (dimensions(i) == 1)
	    dims_changed = true;
	  else
	    new_dimensions(k++) = dimensions(i);
	}

      if (dims_changed)
	{
	  switch (k)
	    {
	    case 0:
	      new_dimensions = dim_vector (1, 1);
	      break;

	    case 1:
	      {
		octave_idx_type tmp = new_dimensions(0);

		new_dimensions.resize (2);

		new_dimensions(0) = tmp;
		new_dimensions(1) = 1;
	      }
	      break;

	    default:
	      new_dimensions.resize (k);
	      break;
	    }
	}

      // FIXME -- it would be better if we did not have to do
      // this, so we could share the data while still having different
      // dimension vectors.

      retval.make_unique ();

      retval.dimensions = new_dimensions;
    }

  return retval;
}

// KLUGE

// The following get_size functions will throw a std::bad_alloc ()
// exception if the requested size is larger than can be indexed by
// octave_idx_type.  This may be smaller than the actual amount of
// memory that can be safely allocated on a system.  However, if we
// don't fail here, we can end up with a mysterious crash inside a
// function that is iterating over an array using octave_idx_type
// indices.

// A guess (should be quite conservative).
#define MALLOC_OVERHEAD 1024

template <class T>
octave_idx_type
Array<T>::get_size (octave_idx_type r, octave_idx_type c)
{
  static int nl;
  static double dl
    = frexp (static_cast<double> 
	(std::numeric_limits<octave_idx_type>::max() - MALLOC_OVERHEAD) / sizeof (T), &nl);

  int nr, nc;
  double dr = frexp (static_cast<double> (r), &nr);   // r = dr * 2^nr
  double dc = frexp (static_cast<double> (c), &nc);   // c = dc * 2^nc

  int nt = nr + nc;
  double dt = dr * dc;

  if (dt < 0.5)
    {
      nt--;
      dt *= 2;
    }

  if (nt < nl || (nt == nl && dt < dl))
    return r * c;
  else
    {
      throw std::bad_alloc ();
      return 0;
    }
}

template <class T>
octave_idx_type
Array<T>::get_size (octave_idx_type r, octave_idx_type c, octave_idx_type p)
{
  static int nl;
  static double dl
    = frexp (static_cast<double>
	(std::numeric_limits<octave_idx_type>::max() - MALLOC_OVERHEAD) / sizeof (T), &nl);

  int nr, nc, np;
  double dr = frexp (static_cast<double> (r), &nr);
  double dc = frexp (static_cast<double> (c), &nc);
  double dp = frexp (static_cast<double> (p), &np);

  int nt = nr + nc + np;
  double dt = dr * dc * dp;

  if (dt < 0.5)
    {
      nt--;
      dt *= 2;

      if (dt < 0.5)
	{
	  nt--;
	  dt *= 2;
	}
    }

  if (nt < nl || (nt == nl && dt < dl))
    return r * c * p;
  else
    {
      throw std::bad_alloc ();
      return 0;
    }
}

template <class T>
octave_idx_type
Array<T>::get_size (const dim_vector& ra_idx)
{
  static int nl;
  static double dl
    = frexp (static_cast<double>
	(std::numeric_limits<octave_idx_type>::max() - MALLOC_OVERHEAD) / sizeof (T), &nl);

  int n = ra_idx.length ();

  int nt = 0;
  double dt = 1;

  for (int i = 0; i < n; i++)
    {
      int nra_idx;
      double dra_idx = frexp (static_cast<double> (ra_idx(i)), &nra_idx);

      nt += nra_idx;
      dt *= dra_idx;

      if (dt < 0.5)
	{
	  nt--;
	  dt *= 2;
	}
    }

  if (nt < nl || (nt == nl && dt < dl))
    {
      octave_idx_type retval = 1;

      for (int i = 0; i < n; i++)
	retval *= ra_idx(i);

      return retval;
    }
  else
    {
      throw std::bad_alloc ();
      return 0;
    }
}

#undef MALLOC_OVERHEAD

template <class T>
octave_idx_type
Array<T>::compute_index (const Array<octave_idx_type>& ra_idx) const
{
  octave_idx_type retval = -1;

  int n = dimensions.length ();

  if (n > 0 && n == ra_idx.length ())
    {
      retval = ra_idx(--n);

      while (--n >= 0)
	{
	  retval *= dimensions(n);
	  retval += ra_idx(n);
	}
    }
  else
    (*current_liboctave_error_handler)
      ("Array<T>::compute_index: invalid ra_idxing operation");

  return retval;
}

template <class T>
T
Array<T>::range_error (const char *fcn, octave_idx_type n) const
{
  (*current_liboctave_error_handler) ("%s (%d): range error", fcn, n);
  return T ();
}

template <class T>
T&
Array<T>::range_error (const char *fcn, octave_idx_type n)
{
  (*current_liboctave_error_handler) ("%s (%d): range error", fcn, n);
  static T foo;
  return foo;
}

template <class T>
T
Array<T>::range_error (const char *fcn, octave_idx_type i, octave_idx_type j) const
{
  (*current_liboctave_error_handler)
    ("%s (%d, %d): range error", fcn, i, j);
  return T ();
}

template <class T>
T&
Array<T>::range_error (const char *fcn, octave_idx_type i, octave_idx_type j)
{
  (*current_liboctave_error_handler)
    ("%s (%d, %d): range error", fcn, i, j);
  static T foo;
  return foo;
}

template <class T>
T
Array<T>::range_error (const char *fcn, octave_idx_type i, octave_idx_type j, octave_idx_type k) const
{
  (*current_liboctave_error_handler)
    ("%s (%d, %d, %d): range error", fcn, i, j, k);
  return T ();
}

template <class T>
T&
Array<T>::range_error (const char *fcn, octave_idx_type i, octave_idx_type j, octave_idx_type k)
{
  (*current_liboctave_error_handler)
    ("%s (%d, %d, %d): range error", fcn, i, j, k);
  static T foo;
  return foo;
}

template <class T>
T
Array<T>::range_error (const char *fcn, const Array<octave_idx_type>& ra_idx) const
{
  std::ostringstream buf;

  buf << fcn << " (";

  octave_idx_type n = ra_idx.length ();

  if (n > 0)
    buf << ra_idx(0);

  for (octave_idx_type i = 1; i < n; i++)
    buf << ", " << ra_idx(i);

  buf << "): range error";

  std::string buf_str = buf.str ();

  (*current_liboctave_error_handler) (buf_str.c_str ());

  return T ();
}

template <class T>
T&
Array<T>::range_error (const char *fcn, const Array<octave_idx_type>& ra_idx)
{
  std::ostringstream buf;

  buf << fcn << " (";

  octave_idx_type n = ra_idx.length ();

  if (n > 0)
    buf << ra_idx(0);

  for (octave_idx_type i = 1; i < n; i++)
    buf << ", " << ra_idx(i);

  buf << "): range error";

  std::string buf_str = buf.str ();

  (*current_liboctave_error_handler) (buf_str.c_str ());

  static T foo;
  return foo;
}

template <class T>
Array<T>
Array<T>::reshape (const dim_vector& new_dims) const
{
  Array<T> retval;

  if (dimensions != new_dims)
    {
      if (dimensions.numel () == new_dims.numel ())
	retval = Array<T> (*this, new_dims);
      else
	(*current_liboctave_error_handler) ("reshape: size mismatch");
    }
  else
    retval = *this;

  return retval;
}



template <class T>
Array<T>
Array<T>::permute (const Array<octave_idx_type>& perm_vec_arg, bool inv) const
{
  Array<T> retval;

  Array<octave_idx_type> perm_vec = perm_vec_arg;

  dim_vector dv = dims ();
  dim_vector dv_new;

  int perm_vec_len = perm_vec.length ();

  if (perm_vec_len < dv.length ())
    (*current_liboctave_error_handler)
      ("%s: invalid permutation vector", inv ? "ipermute" : "permute");

  dv_new.resize (perm_vec_len);

  // Append singleton dimensions as needed.
  dv.resize (perm_vec_len, 1);

  // Need this array to check for identical elements in permutation array.
  Array<bool> checked (perm_vec_len, false);

  // Find dimension vector of permuted array.
  for (int i = 0; i < perm_vec_len; i++)
    {
      octave_idx_type perm_elt = perm_vec.elem (i);

      if (perm_elt >= perm_vec_len || perm_elt < 0)
	{
	  (*current_liboctave_error_handler)
	    ("%s: permutation vector contains an invalid element",
	     inv ? "ipermute" : "permute");

	  return retval;
	}

      if (checked.elem(perm_elt))
	{
	  (*current_liboctave_error_handler)
	    ("%s: permutation vector cannot contain identical elements",
	     inv ? "ipermute" : "permute");

	  return retval;
	}
      else
	checked.elem(perm_elt) = true;

      dv_new(i) = dv(perm_elt);
    }

  int nd = dv.length ();

  // FIXME -- it would be nice to have a sort method in the
  // Array class that also returns the sort indices.

  if (inv)
    {
      OCTAVE_LOCAL_BUFFER (permute_vector, pvec, nd);

      for (int i = 0; i < nd; i++)
	{
	  pvec[i].pidx = perm_vec(i);
	  pvec[i].iidx = i;
	}

      octave_qsort (pvec, static_cast<size_t> (nd),
		    sizeof (permute_vector), permute_vector_compare);

      for (int i = 0; i < nd; i++)
	{
	  perm_vec(i) = pvec[i].iidx;
	  dv_new(i) = dv(perm_vec(i));
	}
    }

  retval.resize (dv_new);

  if (numel () > 0)
    {
      Array<octave_idx_type> cp (nd+1, 1);
      for (octave_idx_type i = 1; i < nd+1; i++)
	cp(i) = cp(i-1) * dv(i-1);

      octave_idx_type incr = cp(perm_vec(0));

      Array<octave_idx_type> base_delta (nd-1, 0);
      Array<octave_idx_type> base_delta_max (nd-1);
      Array<octave_idx_type> base_incr (nd-1);
      for (octave_idx_type i = 0; i < nd-1; i++)
	{
	  base_delta_max(i) = dv_new(i+1);
	  base_incr(i) = cp(perm_vec(i+1));
	}

      octave_idx_type nr_new = dv_new(0);
      octave_idx_type nel_new = dv_new.numel ();
      octave_idx_type n = nel_new / nr_new;

      octave_idx_type k = 0;

      for (octave_idx_type i = 0; i < n; i++)
	{
	  octave_idx_type iidx = 0;
	  for (octave_idx_type kk = 0; kk < nd-1; kk++)
	    iidx += base_delta(kk) * base_incr(kk);

	  for (octave_idx_type j = 0; j < nr_new; j++)
	    {
	      OCTAVE_QUIT;

	      retval(k++) = elem(iidx);
	      iidx += incr;
	    }

	  base_delta(0)++;

	  for (octave_idx_type kk = 0; kk < nd-2; kk++)
	    {
	      if (base_delta(kk) == base_delta_max(kk))
		{
		  base_delta(kk) = 0;
		  base_delta(kk+1)++;
		}
	    }
	}
    }

  retval.chop_trailing_singletons ();

  return retval;
}

// Helper class for multi-d index reduction and recursive indexing/indexed assignment.
// Rationale: we could avoid recursion using a state machine instead. However, using
// recursion is much more amenable to possible parallelization in the future.
// Also, the recursion solution is cleaner and more understandable.
class rec_index_helper
{
  octave_idx_type *dim, *cdim;
  idx_vector *idx;
  int top;

public:
  rec_index_helper (const dim_vector& dv, const Array<idx_vector>& ia)
    {
      int n = ia.length ();
      assert (n > 0 && (dv.length () == std::max (n, 2)));

      dim = new octave_idx_type [2*n];
      // A hack to avoid double allocation
      cdim = dim + n;
      idx = new idx_vector [n];
      top = 0;

      dim[0] = dv(0);
      cdim[0] = 1;
      idx[0] = ia(0);

      for (int i = 1; i < n; i++)
        {
          // Try reduction...
          if (idx[top].maybe_reduce (dim[top], ia(i), dv(i)))
            {
              // Reduction successful, fold dimensions.
              dim[top] *= dv(i);
            }
          else
            {
              // Unsuccessful, store index & cumulative dim.
              top++;
              idx[top] = ia(i);
              dim[top] = dv(i);
              cdim[top] = cdim[top-1] * dim[top-1];
            } 
        }
    }

  ~rec_index_helper (void) { delete [] idx; delete [] dim; }

private:

  // Recursive N-d indexing
  template <class T>
  T *do_index (const T *src, T *dest, int lev) const
    {
      if (lev == 0)
        dest += idx[0].index (src, dim[0], dest);
      else
        {
          octave_idx_type n = idx[lev].length (dim[lev]), d = cdim[lev];
          for (octave_idx_type i = 0; i < n; i++)
            dest = do_index (src + d*idx[lev].xelem (i), dest, lev-1);
        }

      return dest;
    }
  
  // Recursive N-d indexed assignment
  template <class T>
  const T *do_assign (const T *src, T *dest, int lev) const
    {
      if (lev == 0)
        src += idx[0].assign (src, dim[0], dest);
      else
        {
          octave_idx_type n = idx[lev].length (dim[lev]), d = cdim[lev];
          for (octave_idx_type i = 0; i < n; i++)
            src = do_assign (src, dest + d*idx[lev].xelem (i), lev-1);
        }

      return src;
    }

  // Recursive N-d indexed assignment
  template <class T>
  void do_fill (const T& val, T *dest, int lev) const
    {
      if (lev == 0)
        idx[0].fill (val, dim[0], dest);
      else
        {
          octave_idx_type n = idx[lev].length (dim[lev]), d = cdim[lev];
          for (octave_idx_type i = 0; i < n; i++)
            do_fill (val, dest + d*idx[lev].xelem (i), lev-1);
        }
    }

public:

  template <class T>
  void index (const T *src, T *dest) const { do_index (src, dest, top); }

  template <class T>
  void assign (const T *src, T *dest) const { do_assign (src, dest, top); }

  template <class T>
  void fill (const T& val, T *dest) const { do_fill (val, dest, top); }

};

// Helper class for multi-d recursive resizing
// This handles resize () in an efficient manner, touching memory only
// once (apart from reinitialization)
class rec_resize_helper
{
  octave_idx_type *cext, *sext, *dext;
  int n;

public:
  rec_resize_helper (const dim_vector& ndv, const dim_vector& odv)
    {
      int l = ndv.length ();
      assert (odv.length () == l);
      octave_idx_type ld = 1;
      int i = 0;
      for (; i < l-1 && ndv(i) == odv(i); i++) ld *= ndv(i);
      n = l - i;
      cext = new octave_idx_type[3*n];
      // Trick to avoid three allocations
      sext = cext + n;
      dext = sext + n;

      octave_idx_type sld = ld, dld = ld;
      for (int j = 0; j < n; j++)
        {
          cext[j] = std::min (ndv(i+j), odv(i+j));
          sext[j] = sld *= odv(i+j);
          dext[j] = dld *= ndv(i+j);
        }
      cext[0] *= ld;
    }

  ~rec_resize_helper (void) { delete [] cext; }

private:
  // recursive resizing
  template <class T>
  void do_resize_fill (const T* src, T *dest, const T& rfv, int lev) const
    {
      if (lev == 0)
        {
          T* destc = std::copy (src, src + cext[0], dest);
          std::fill (destc, dest + dext[0], rfv);
        }
      else
        {
          octave_idx_type sd = sext[lev-1], dd = dext[lev-1], k;
          for (k = 0; k < cext[lev]; k++)
            do_resize_fill (src + k * sd, dest + k * dd, rfv, lev - 1);

          std::fill (dest + k * dd, dest + dext[lev], rfv);
        }
    }
public:
  template <class T>
  void resize_fill (const T* src, T *dest, const T& rfv) const 
    { do_resize_fill (src, dest, rfv, n-1); }

};

static void gripe_index_out_of_range (void)
{
  (*current_liboctave_error_handler)
    ("A(I): Index exceeds matrix dimension.");
}

template <class T>
Array<T>
Array<T>::index (const idx_vector& i) const
{
  octave_idx_type n = numel ();
  Array<T> retval;

  if (i.is_colon ())
    {
      // A(:) produces a shallow copy as a column vector.
      retval.dimensions = dim_vector (n, 1);
      rep->count++;
      retval.rep = rep;
    }
  else if (i.extent (n) != n)
    {
      gripe_index_out_of_range ();
    }
  else
    {
      // FIXME -- this is the only place where orig_dimensions are used.
      dim_vector rd = i.orig_dimensions ();
      octave_idx_type il = i.length (n);

      // FIXME -- this is for Matlab compatibility.  Matlab 2007 given
      //
      //   b = ones(3,1)
      //
      // yields the following:
      //
      //   b(zeros(0,0)) gives []
      //   b(zeros(1,0)) gives zeros(0,1)
      //   b(zeros(0,1)) gives zeros(0,1)
      //   b(zeros(0,m)) gives zeros(0,m)
      //   b(zeros(m,0)) gives zeros(m,0)
      //   b(1:2) gives ones(2,1)
      //   b(ones(2)) gives ones(2) etc.
      //
      // As you can see, the behaviour is weird, but the tests end up pretty
      // simple.  Nah, I don't want to suggest that this is ad hoc :)

      if (ndims () == 2 && n != 1)
        {
          if (columns () == 1 && rd(0) == 1)
            rd = dim_vector (il, 1);
          else if (rows () == 1 && rd(1) == 1)
            rd = dim_vector (1, il);
        }

      // Don't use resize here to avoid useless initialization for POD
      // types.
      retval = Array<T> (rd);

      if (il != 0)
        i.index (data (), n, retval.fortran_vec ());
    }

  return retval;
}

template <class T>
Array<T>
Array<T>::index (const idx_vector& i, const idx_vector& j) const
{
  // Get dimensions, allowing Fortran indexing in the 2nd dim.
  dim_vector dv = dimensions.redim (2);
  octave_idx_type r = dv(0), c = dv(1);
  Array<T> retval;

  if (i.is_colon () && j.is_colon ())
    {
      // A(:,:) produces a shallow copy.
      retval = Array<T> (*this, dv);
    }
  else if (i.extent (r) != r || j.extent (c) != c)
    {
      gripe_index_out_of_range ();
    }
  else
    {
      octave_idx_type n = numel (), il = i.length (r), jl = j.length (c);

      // Don't use resize here to avoid useless initialization for POD types.
      retval = Array<T> (dim_vector (il, jl));

      idx_vector ii (i);

      const T* src = data ();
      T *dest = retval.fortran_vec ();

      if (ii.maybe_reduce (r, j, c))
        ii.index (src, n, dest);
      else
        {
          for (octave_idx_type k = 0; k < jl; k++)
            dest += i.index (src + r * j.xelem (k), r, dest);
        }
    }

  return retval;
}

template <class T>
Array<T>
Array<T>::index (const Array<idx_vector>& ia) const
{
  int ial = ia.length ();
  Array<T> retval;

  // FIXME: is this dispatching necessary?
  if (ial == 1)
    retval = index (ia(0));
  else if (ial == 2)
    retval = index (ia(0), ia(1));
  else if (ial > 0)
    {
      // Get dimensions, allowing Fortran indexing in the last dim.
      dim_vector dv = dimensions.redim (ial);

      // Check for out of bounds conditions.
      bool all_colons = true, mismatch = false;
      for (int i = 0; i < ial; i++)
        {
          if (ia(i).extent (dv(i)) != dv(i))
            {
              mismatch = true;
              break;
            }
          else
            all_colons = all_colons && ia(i).is_colon ();
        }


      if (mismatch)
        {
          gripe_index_out_of_range ();
        }
      else if (all_colons)
        {
          // A(:,:,...,:) produces a shallow copy.
          retval = Array<T> (*this, dv);
        }
      else 
        {
          // Form result dimensions.
          dim_vector rdv;
          rdv.resize (ial);
          for (int i = 0; i < ial; i++) rdv(i) = ia(i).length (dv(i));
          rdv.chop_trailing_singletons ();

          // Don't use resize here to avoid useless initialization for POD types.
          retval = Array<T> (rdv);

          // Prepare for recursive indexing
          rec_index_helper rh (dv, ia);

          // Do it.
          rh.index (data (), retval.fortran_vec ());
        }
    }

  return retval;
}

// FIXME -- the following is a common error message to resize,
// regardless of whether it's called from assign or elsewhere.  It
// seems OK to me, but eventually the gripe can be specialized.
// Anyway, propagating various error messages into procedure is, IMHO,
// a nonsense.  If anything, we should change error handling here (and
// throughout liboctave) to allow custom handling of errors
static void gripe_invalid_resize (void)
{
  (*current_liboctave_error_handler)
    ("resize: Invalid resizing operation or ambiguous assignment to an out-of-bounds array element.");
}

// The default fill value.  Override if you want a different one.

template <class T>
T Array<T>::resize_fill_value ()
{
  return T ();
}

// Yes, we could do resize using index & assign.  However, that would
// possibly involve a lot more memory traffic than we actually need.

template <class T>
void
Array<T>::resize_fill (octave_idx_type n, const T& rfv)
{
  if (n >= 0 && ndims () == 2)
    {
      dim_vector dv;
      // This is driven by Matlab's behaviour of giving a *row* vector
      // on some out-of-bounds assignments.  Specifically, Matlab
      // allows a(i) with out-of-bouds i when a is either of 0x0, 1x0,
      // 1x1, 0xN, and gives a row vector in all cases (yes, even the
      // last one, search me why).  Giving a column vector would make
      // much more sense (given the way trailing singleton dims are
      // treated).
      bool invalid = false;
      if (rows () == 0 || rows () == 1)
        dv = dim_vector (1, n);          
      else if (columns () == 1)
        dv = dim_vector (n, 1);
      else
         invalid = true;
        
      if (invalid)
        gripe_invalid_resize ();
      else
        {
          octave_idx_type nx = numel ();
          if (n != nx)
            {
              Array<T> tmp = Array<T> (dv);
              T *dest = tmp.fortran_vec ();

              octave_idx_type n0 = std::min (n, nx), n1 = n - n0;
              dest = std::copy (data (), data () + n0, dest);
              std::fill (dest, dest + n1, rfv);

              *this = tmp;
            }
        }
    }
  else
    gripe_invalid_resize ();
}

template <class T>
void
Array<T>::resize_fill (octave_idx_type r, octave_idx_type c, const T& rfv)
{
  if (r >= 0 && c >= 0 && ndims () == 2)
    {
      octave_idx_type rx = rows (), cx = columns ();
      if (r != rx || c != cx)
        {
          Array<T> tmp = Array<T> (dim_vector (r, c));
          T *dest = tmp.fortran_vec ();

          octave_idx_type r0 = std::min (r, rx), r1 = r - r0;
          octave_idx_type c0 = std::min (c, cx), c1 = c - c0;
          const T *src = data ();
          if (r == rx)
            dest = std::copy (src, src + r * c0, dest);
          else
            {
              for (octave_idx_type k = 0; k < c0; k++)
                {
                  dest = std::copy (src, src + r0, dest);
                  src += rx;
                  std::fill (dest, dest + r1, rfv);
                  dest += r1;
                }
            }

          std::fill (dest, dest + r * c1, rfv);

          *this = tmp;
        }
    }
  else
    gripe_invalid_resize ();

}

template<class T>
void
Array<T>::resize_fill (const dim_vector& dv, const T& rfv)
{
  int dvl = dv.length ();
  if (dvl == 1)
    resize (dv(0), rfv);
  else if (dvl == 2)
    resize (dv(0), dv(1), rfv);
  else if (dimensions != dv)
    {
      if (dimensions.length () <= dvl)
        {
          Array<T> tmp (dv);
          // Prepare for recursive resizing.
          rec_resize_helper rh (dv, dimensions.redim (dvl));

          // Do it.
          rh.resize_fill (data (), tmp.fortran_vec (), rfv);   
          *this = tmp;
        }
      else
        gripe_invalid_resize ();
    }
}

template <class T>
Array<T> 
Array<T>::index (const idx_vector& i, bool resize_ok, const T& rfv) const
{
  Array<T> tmp = *this;
  if (resize_ok)
    {
      octave_idx_type n = numel (), nx = i.extent (n);
      if (n != nx)
        tmp.resize_fill (nx, rfv);

      if (tmp.numel () != nx)
        return Array<T> ();
    }

  return tmp.index (i);
}

template <class T>
Array<T> 
Array<T>::index (const idx_vector& i, const idx_vector& j, 
                 bool resize_ok, const T& rfv) const
{
  Array<T> tmp = *this;
  if (resize_ok)
    {
      dim_vector dv = dimensions.redim (2);
      octave_idx_type r = dv(0), c = dv(1);
      octave_idx_type rx = i.extent (r), cx = j.extent (c);
      if (r != rx || c != cx)
        tmp.resize_fill (rx, cx, rfv);

      if (tmp.rows () != rx || tmp.columns () != cx)
        return Array<T> ();
    }

  return tmp.index (i, j);  
}

template <class T>
Array<T> 
Array<T>::index (const Array<idx_vector>& ia,
                 bool resize_ok, const T& rfv) const
{
  Array<T> tmp = *this;
  if (resize_ok)
    {
      int ial = ia.length ();
      dim_vector dv = dimensions.redim (ial);
      dim_vector dvx; dvx.resize (ial);
      for (int i = 0; i < ial; i++) dvx(i) = ia(i).extent (dv (i));
      if (! (dvx == dv))
        tmp.resize_fill (dvx, rfv);

      if (tmp.dimensions != dvx)
        return Array<T> ();
    }

  return tmp.index (ia);  
}


static void 
gripe_invalid_assignment_size (void)
{
  (*current_liboctave_error_handler)
    ("A(I) = X: X must have the same size as I");
}

static void
gripe_assignment_dimension_mismatch (void)
{
  (*current_liboctave_error_handler)
    ("A(I,J,...) = X: dimensions mismatch");
}

template <class T>
void
Array<T>::assign (const idx_vector& i, const Array<T>& rhs, const T& rfv)
{
  octave_idx_type n = numel (), rhl = rhs.numel ();

  if (rhl == 1 || i.length (n) == rhl)
    {
      octave_idx_type nx = i.extent (n);
      // Try to resize first if necessary. 
      if (nx != n)
        {
          resize_fill (nx, rfv);      
          n = numel ();
        }

      // If the resizing was ambiguous, resize has already griped.
      if (nx == n)
        {
          if (i.is_colon ())
            {
              // A(:) = X makes a full fill or a shallow copy.
              if (rhl == 1)
                fill (rhs(0));
              else
                *this = rhs.reshape (dimensions);
            }
          else
            {
              if (rhl == 1)
                i.fill (rhs(0), n, fortran_vec ());
              else
                i.assign (rhs.data (), n, fortran_vec ());
            }
        }
    }
  else
    gripe_invalid_assignment_size ();
}

template <class T>
void
Array<T>::assign (const idx_vector& i, const idx_vector& j,
                  const Array<T>& rhs, const T& rfv)
{
  // Get RHS extents, discarding singletons.
  dim_vector rhdv = rhs.dims (); 
  // Get LHS extents, allowing Fortran indexing in the second dim.
  dim_vector dv = dimensions.redim (2);
  // Check for out-of-bounds and form resizing dimensions.
  dim_vector rdv; 
  // In the special when all dimensions are zero, colons are allowed
  // to inquire the shape of RHS.  The rules are more obscure, so we
  // solve that elsewhere.
  if (dv.all_zero ())
    rdv = zero_dims_inquire (i, j, rhdv);
  else
    {
      rdv(0) = i.extent (dv(0));
      rdv(1) = j.extent (dv(1));
    }

  bool isfill = rhs.numel () == 1;
  octave_idx_type il = i.length (rdv(0)), jl = j.length (rdv(1));
  rhdv.chop_all_singletons ();
  bool match = (isfill
		|| (rhdv.length () == 2 && il == rhdv(0) && jl == rhdv(1)));
  match = match || (il == 1 && jl == rhdv(0) && rhdv(1) == 1);

  if (match)
    {
      // Resize if requested.
      if (rdv != dv)
        {
          resize (rdv, rfv);
          dv = dimensions;
        }

      // If the resizing was invalid, resize has already griped.
      if (dv == rdv)
        {
          if (i.is_colon () && j.is_colon ())
            {
              // A(:,:) = X makes a full fill or a shallow copy
              if (isfill)
                fill (rhs(0));
              else
                *this = rhs.reshape (dimensions);
            }
          else
            {
              // The actual work.
              octave_idx_type n = numel (), r = rows (), c = columns ();
              idx_vector ii (i);

              const T* src = rhs.data ();
              T *dest = fortran_vec ();

              // Try reduction first.
              if (ii.maybe_reduce (r, j, c))
                {
                  if (isfill)
                    ii.fill (*src, n, dest);
                  else
                    ii.assign (src, n, dest);
                }
              else
                {
                  if (isfill)
                    {
                      for (octave_idx_type k = 0; k < jl; k++)
                        i.fill (*src, r, dest + r * j.xelem (k));
                    }
                  else
                    {
                      for (octave_idx_type k = 0; k < jl; k++)
                        src += i.assign (src, r, dest + r * j.xelem (k));
                    }
                }
            }
          
        }

    }
  else
    gripe_assignment_dimension_mismatch ();
}

template <class T>
void
Array<T>::assign (const Array<idx_vector>& ia,
                  const Array<T>& rhs, const T& rfv)
{
  int ial = ia.length ();

  // FIXME: is this dispatching necessary / desirable?
  if (ial == 1)
    assign (ia(0), rhs, rfv);
  else if (ial == 2)
    assign (ia(0), ia(1), rhs, rfv);
  else if (ial > 0)
    {
      // Get RHS extents, discarding singletons.
      dim_vector rhdv = rhs.dims ();

      // Get LHS extents, allowing Fortran indexing in the second dim.
      dim_vector dv = dimensions.redim (ial);
      
      // Get the extents forced by indexing. 
      dim_vector rdv;

      // In the special when all dimensions are zero, colons are
      // allowed to inquire the shape of RHS.  The rules are more
      // obscure, so we solve that elsewhere.
      if (dv.all_zero ())
        rdv = zero_dims_inquire (ia, rhdv);
      else
        {
          rdv.resize (ial);
          for (int i = 0; i < ial; i++)
            rdv(i) = ia(i).extent (dv(i));
        }

      // Check whether LHS and RHS match, up to singleton dims.
      bool match = true, all_colons = true, isfill = rhs.numel () == 1;

      rhdv.chop_all_singletons ();
      int j = 0, rhdvl = rhdv.length ();
      for (int i = 0; i < ial; i++)
        {
          all_colons = all_colons && ia(i).is_colon ();
          octave_idx_type l = ia(i).length (rdv(i));
          if (l == 1) continue;
          match = match && j < rhdvl && l == rhdv(j++);
        }

      match = match && (j == rhdvl || rhdv(j) == 1);
      match = match || isfill;
            
      if (match)
        {
          // Resize first if necessary.
          if (rdv != dv)
            {
              resize_fill (rdv, rfv);
              dv = dimensions;
              chop_trailing_singletons ();
            }

          // If the resizing was invalid, resize has already griped.
          if (dv == rdv)
            {
              if (all_colons)
                {
                  // A(:,:,...,:) = X makes a full fill or a shallow copy.
                  if (isfill)
                    fill (rhs(0));
                  else
                    *this = rhs.reshape (dimensions);
                }
              else
                {
                  // Do the actual work.

                  // Prepare for recursive indexing
                  rec_index_helper rh (dv, ia);

                  // Do it.
                  if (isfill)
                    rh.fill (rhs(0), fortran_vec ());
                  else
                    rh.assign (rhs.data (), fortran_vec ());
                }
            }
        }
      else 
        gripe_assignment_dimension_mismatch ();
    }
}

template <class T>
void 
Array<T>::delete_elements (const idx_vector& i)
{
  octave_idx_type n = numel ();
  if (i.is_colon ())
    { 
      *this = Array<T> ();
    }
  else if (i.extent (n) != n)
    {
      gripe_index_out_of_range ();
    }
  else if (i.length (n) != 0)
    {
      octave_idx_type l, u;

      bool col_vec = ndims () == 2 && columns () == 1 && rows () != 1;
      if (i.is_cont_range (n, l, u))
        {
          // Special case deleting a contiguous range.
          octave_idx_type m = n + l - u;
          Array<T> tmp (dim_vector (col_vec ? m : 1, !col_vec ? m : 1));
          const T *src = data ();
          T *dest = tmp.fortran_vec ();
          dest = std::copy (src, src + l, dest);
          dest = std::copy (src + u, src + n, dest);
          *this = tmp;
        }
      else
        {
          // Use index.
          *this = index (i.complement (n));
        }
    }
}

template <class T>
void 
Array<T>::delete_elements (int dim, const idx_vector& i)
{
  if (dim < 0 || dim >= ndims ())
    {
      (*current_liboctave_error_handler)
        ("invalid dimension in delete_elements");
      return;
    }

  octave_idx_type n = dimensions (dim);
  if (i.is_colon ())
    { 
      *this = Array<T> ();
    }
  else if (i.extent (n) != n)
    {
      gripe_index_out_of_range ();
    }
  else if (i.length (n) != 0)
    {
      octave_idx_type l, u;

      if (i.is_cont_range (n, l, u))
        {
          // Special case deleting a contiguous range.
          octave_idx_type nd = n + l - u, dl = 1, du = 1;
          dim_vector rdv = dimensions;
          rdv(dim) = nd;
          for (int k = 0; k < dim; k++) dl *= dimensions(k);
          for (int k = dim + 1; k < ndims (); k++) du *= dimensions(k);

          // Special case deleting a contiguous range.
          Array<T> tmp = Array<T> (rdv);
          const T *src = data ();
          T *dest = tmp.fortran_vec ();
          l *= dl; u *= dl; n *= dl;
          for (octave_idx_type k = 0; k < du; k++)
            {
              dest = std::copy (src, src + l, dest);
              dest = std::copy (src + u, src + n, dest);
              src += n;
            }

          *this = tmp;
        }
      else
        {
          // Use index.
          Array<idx_vector> ia (ndims (), idx_vector::colon);
          ia (dim) = i.complement (n);
          *this = index (ia);
        }
    }
}

template <class T>
void 
Array<T>::delete_elements (const Array<idx_vector>& ia)
{
  if (ia.length () == 1)
    delete_elements (ia(0));
  else
    {
      int len = ia.length (), k, dim = -1;
      for (k = 0; k < len; k++)
        {
          if (! ia(k).is_colon ())
            {
              if (dim < 0)
                dim = k;
              else
                break;
            }
        }
      if (dim < 0)
        {
          dim_vector dv = dimensions;
          dv(0) = 0;
          *this = Array<T> (dv);
        }
      else if (k == len)
        {
          delete_elements (dim, ia(dim));
        }
      else
        {
          (*current_liboctave_error_handler)
            ("A null assignment can only have one non-colon index.");
        }
    }

}

// FIXME: Remove these methods or implement them using assign.

template <class T>
Array<T>&
Array<T>::insert (const Array<T>& a, octave_idx_type r, octave_idx_type c)
{
  if (ndims () == 2 && a.ndims () == 2)
    insert2 (a, r, c);
  else
    insertN (a, r, c);

  return *this;
}


template <class T>
Array<T>&
Array<T>::insert2 (const Array<T>& a, octave_idx_type r, octave_idx_type c)
{
  octave_idx_type a_rows = a.rows ();
  octave_idx_type a_cols = a.cols ();

  if (r < 0 || r + a_rows > rows () || c < 0 || c + a_cols > cols ())
    {
      (*current_liboctave_error_handler) ("range error for insert");
      return *this;
    }

  for (octave_idx_type j = 0; j < a_cols; j++)
    for (octave_idx_type i = 0; i < a_rows; i++)
      elem (r+i, c+j) = a.elem (i, j);

  return *this;
}

template <class T>
Array<T>&
Array<T>::insertN (const Array<T>& a, octave_idx_type r, octave_idx_type c)
{
  dim_vector dv = dims ();

  dim_vector a_dv = a.dims ();

  int n = a_dv.length ();

  if (n == dimensions.length ())
    {
      Array<octave_idx_type> a_ra_idx (a_dv.length (), 0);

      a_ra_idx.elem (0) = r;
      a_ra_idx.elem (1) = c;

      for (int i = 0; i < n; i++)
	{
	  if (a_ra_idx(i) < 0 || (a_ra_idx(i) + a_dv(i)) > dv(i))
	    {
	      (*current_liboctave_error_handler)
		("Array<T>::insert: range error for insert");
	      return *this;
	    }
	}

      octave_idx_type n_elt = a.numel ();
      
      const T *a_data = a.data ();   
   
      octave_idx_type iidx = 0;
	  
      octave_idx_type a_rows = a_dv(0);

      octave_idx_type this_rows = dv(0);
	  
      octave_idx_type numel_page = a_dv(0) * a_dv(1);	  

      octave_idx_type count_pages = 0;
	  
      for (octave_idx_type i = 0; i < n_elt; i++)
	{
	  if (i != 0 && i % a_rows == 0)
	    iidx += (this_rows - a_rows);	      
	  
	  if (i % numel_page == 0)
	    iidx = c * dv(0) + r + dv(0) * dv(1) * count_pages++;

	  elem (iidx++) = a_data[i];
	}
    }
  else
    (*current_liboctave_error_handler)
      ("Array<T>::insert: invalid indexing operation");

  return *this;
}

template <class T>
Array<T>&
Array<T>::insert (const Array<T>& a, const Array<octave_idx_type>& ra_idx)
{
  octave_idx_type n = ra_idx.length ();

  if (n == dimensions.length ())
    {
      dim_vector dva = a.dims ();
      dim_vector dv = dims ();
      int len_a = dva.length ();
      int non_full_dim = 0;

      for (octave_idx_type i = 0; i < n; i++)
	{
	  if (ra_idx(i) < 0 || (ra_idx(i) + 
				(i < len_a ? dva(i) : 1)) > dimensions(i))
	    {
	      (*current_liboctave_error_handler)
		("Array<T>::insert: range error for insert");
	      return *this;
	    }

	  if (dv(i) != (i < len_a ? dva(i) : 1))
	    non_full_dim++;
	}

      if (dva.numel ())
        {
	  if (non_full_dim < 2)
	    {
	      // Special case for fast concatenation
	      const T *a_data = a.data ();
	      octave_idx_type numel_to_move = 1;
	      octave_idx_type skip = 0;
	      for (int i = 0; i < len_a; i++)
		if (ra_idx(i) == 0 && dva(i) == dv(i))
		  numel_to_move *= dva(i);
		else
		  {
		    skip = numel_to_move * (dv(i) - dva(i));
		    numel_to_move *= dva(i);
		    break;
		  }

	      octave_idx_type jidx = ra_idx(n-1);
	      for (int i = n-2; i >= 0; i--)
		{
		  jidx *= dv(i);
		  jidx += ra_idx(i);
		}

	      octave_idx_type iidx = 0;
	      octave_idx_type moves = dva.numel () / numel_to_move;
	      for (octave_idx_type i = 0; i < moves; i++)
		{
		  for (octave_idx_type j = 0; j < numel_to_move; j++)
		    elem (jidx++) = a_data[iidx++];
		  jidx += skip;
		}
	    }
	  else
	    {
	      // Generic code
	      const T *a_data = a.data ();
	      int nel = a.numel ();
	      Array<octave_idx_type> a_idx (n, 0);

	      for (int i = 0; i < nel; i++)
		{
		  int iidx = a_idx(n-1) + ra_idx(n-1);
		  for (int j = n-2; j >= 0; j--)
		    {
		      iidx *= dv(j);
		      iidx += a_idx(j) + ra_idx(j);
		    }

		  elem (iidx) = a_data[i];

		  increment_index (a_idx, dva);
		}
	    }
	}
    }
  else
    (*current_liboctave_error_handler)
      ("Array<T>::insert: invalid indexing operation");

  return *this;
}


template <class T>
Array<T>
Array<T>::transpose (void) const
{
  assert (ndims () == 2);

  octave_idx_type nr = dim1 ();
  octave_idx_type nc = dim2 ();

  if (nr >= 8 && nc >= 8)
    {
      Array<T> result (dim_vector (nc, nr));

      // Blocked transpose to attempt to avoid cache misses.

      // Don't use OCTAVE_LOCAL_BUFFER here as it doesn't work with bool
      // on some compilers.
      T buf[64];

      octave_idx_type ii = 0, jj;
      for (jj = 0; jj < (nc - 8 + 1); jj += 8)
	{
	  for (ii = 0; ii < (nr - 8 + 1); ii += 8)
	    {
	      // Copy to buffer
	      for (octave_idx_type j = jj, k = 0, idxj = jj * nr; 
		   j < jj + 8; j++, idxj += nr)
		for (octave_idx_type i = ii; i < ii + 8; i++)
		  buf[k++] = xelem (i + idxj);

	      // Copy from buffer
	      for (octave_idx_type i = ii, idxi = ii * nc; i < ii + 8; 
		   i++, idxi += nc)
		for (octave_idx_type j = jj, k = i - ii; j < jj + 8; 
		     j++, k+=8)
		  result.xelem (j + idxi) = buf[k];
	    }

	  if (ii < nr)
	    for (octave_idx_type j = jj; j < jj + 8; j++)
	      for (octave_idx_type i = ii; i < nr; i++)
		result.xelem (j, i) = xelem (i, j);
	} 

      for (octave_idx_type j = jj; j < nc; j++)
	for (octave_idx_type i = 0; i < nr; i++)
	  result.xelem (j, i) = xelem (i, j);

      return result;
    }
  else if (nr > 1 && nc > 1)
    {
      Array<T> result (dim_vector (nc, nr));

      for (octave_idx_type j = 0; j < nc; j++)
	for (octave_idx_type i = 0; i < nr; i++)
	  result.xelem (j, i) = xelem (i, j);

      return result;
    }
  else
    {
      // Fast transpose for vectors and empty matrices.
      return Array<T> (*this, dim_vector (nc, nr));
    }
}

template <class T>
static T
no_op_fcn (const T& x)
{
  return x;
}

template <class T>
Array<T>
Array<T>::hermitian (T (*fcn) (const T&)) const
{
  assert (ndims () == 2);

  if (! fcn)
    fcn = no_op_fcn<T>;

  octave_idx_type nr = dim1 ();
  octave_idx_type nc = dim2 ();

  if (nr >= 8 && nc >= 8)
    {
      Array<T> result (dim_vector (nc, nr));

      // Blocked transpose to attempt to avoid cache misses.

      // Don't use OCTAVE_LOCAL_BUFFER here as it doesn't work with bool
      // on some compilers.
      T buf[64];

      octave_idx_type ii = 0, jj;
      for (jj = 0; jj < (nc - 8 + 1); jj += 8)
	{
	  for (ii = 0; ii < (nr - 8 + 1); ii += 8)
	    {
	      // Copy to buffer
	      for (octave_idx_type j = jj, k = 0, idxj = jj * nr; 
		   j < jj + 8; j++, idxj += nr)
		for (octave_idx_type i = ii; i < ii + 8; i++)
		  buf[k++] = xelem (i + idxj);

	      // Copy from buffer
	      for (octave_idx_type i = ii, idxi = ii * nc; i < ii + 8; 
		   i++, idxi += nc)
		for (octave_idx_type j = jj, k = i - ii; j < jj + 8; 
		     j++, k+=8)
		  result.xelem (j + idxi) = fcn (buf[k]);
	    }

	  if (ii < nr)
	    for (octave_idx_type j = jj; j < jj + 8; j++)
	      for (octave_idx_type i = ii; i < nr; i++)
		result.xelem (j, i) = fcn (xelem (i, j));
	} 

      for (octave_idx_type j = jj; j < nc; j++)
	for (octave_idx_type i = 0; i < nr; i++)
	  result.xelem (j, i) = fcn (xelem (i, j));

      return result;
    }
  else
    {
      Array<T> result (dim_vector (nc, nr));

      for (octave_idx_type j = 0; j < nc; j++)
	for (octave_idx_type i = 0; i < nr; i++)
	  result.xelem (j, i) = fcn (xelem (i, j));

      return result;
    }
}

/*

%% Tranpose tests for matrices of the tile size and plus or minus a row
%% and with four tiles.

%!shared m7, mt7, m8, mt8, m9, mt9
%! m7 = reshape (1 : 7*8, 8, 7);
%! mt7 = [1:8; 9:16; 17:24; 25:32; 33:40; 41:48; 49:56];
%! m8 = reshape (1 : 8*8, 8, 8);
%! mt8 = mt8 = [mt7; 57:64];
%! m9 = reshape (1 : 9*8, 8, 9);
%! mt9 = [mt8; 65:72];

%!assert(m7', mt7)
%!assert((1i*m7).', 1i * mt7)
%!assert((1i*m7)', conj (1i * mt7))
%!assert(m8', mt8)
%!assert((1i*m8).', 1i * mt8)
%!assert((1i*m8)', conj (1i * mt8))
%!assert(m9', mt9)
%!assert((1i*m9).', 1i * mt9)
%!assert((1i*m9)', conj (1i * mt9))
%!assert([m7, m8; m7, m8]', [mt7, mt7; mt8, mt8])
%!assert((1i*[m7, m8; m7, m8]).', 1i * [mt7, mt7; mt8, mt8])
%!assert((1i*[m7, m8; m7, m8])', conj (1i * [mt7, mt7; mt8, mt8]))
%!assert([m8, m8; m8, m8]', [mt8, mt8; mt8, mt8])
%!assert((1i*[m8, m8; m8, m8]).', 1i * [mt8, mt8; mt8, mt8])
%!assert((1i*[m8, m8; m8, m8])', conj (1i * [mt8, mt8; mt8, mt8]))
%!assert([m9, m8; m9, m8]', [mt9, mt9; mt8, mt8])
%!assert((1i*[m9, m8; m9, m8]).', 1i * [mt9, mt9; mt8, mt8])
%!assert((1i*[m9, m8; m9, m8])', conj (1i * [mt9, mt9; mt8, mt8]))

*/

template <class T>
T *
Array<T>::fortran_vec (void)
{
  make_unique ();

  return rep->data;
}

template <class T>
void
Array<T>::maybe_delete_dims (void)
{
  int nd = dimensions.length ();

  dim_vector new_dims (1, 1);

  bool delete_dims = true;

  for (int i = nd - 1; i >= 0; i--)
    {
      if (delete_dims)
        {
          if (dimensions(i) != 1)
	    {
	      delete_dims = false;

	      new_dims = dim_vector (i + 1, dimensions(i));
	    }
        }
      else
	new_dims(i) = dimensions(i);
    }

  if (nd != new_dims.length ())
    dimensions = new_dims;
}

template <class T>
bool 
ascending_compare (T a, T b)
{
  return (a < b);
}

template <class T>
bool 
descending_compare (T a, T b)
{
  return (a > b);
}

template <class T>
bool 
ascending_compare (vec_index<T> *a, vec_index<T> *b)
{
  return (a->vec < b->vec);
}

template <class T>
bool 
descending_compare (vec_index<T> *a, vec_index<T> *b)
{
  return (a->vec > b->vec);
}

template <class T>
Array<T>
Array<T>::sort (octave_idx_type dim, sortmode mode) const
{
  Array<T> m = *this;

  dim_vector dv = m.dims ();

  if (m.length () < 1)
    return m;

  octave_idx_type ns = dv(dim);
  octave_idx_type iter = dv.numel () / ns;
  octave_idx_type stride = 1;
  for (int i = 0; i < dim; i++)
    stride *= dv(i);

  T *v = m.fortran_vec ();
  octave_sort<T> lsort;
  
  if (mode == ASCENDING) 
    lsort.set_compare (ascending_compare);
  else if (mode == DESCENDING)
    lsort.set_compare (descending_compare);
  else
    abort ();

  if (stride == 1)
    {
      for (octave_idx_type j = 0; j < iter; j++)
	{
	  lsort.sort (v, ns);
	  v += ns;
	}
    }
  else
    {
      // Don't use OCTAVE_LOCAL_BUFFER here as it doesn't work with bool
      // on some compilers.
      Array<T> vi (ns);
      T *pvi = vi.fortran_vec ();

      for (octave_idx_type j = 0; j < iter; j++) 
	{
	   octave_idx_type offset = j;
	   octave_idx_type offset2 = 0;
	  while (offset >= stride)
	    {
	      offset -= stride;
	      offset2++;
	    }
	  offset += offset2 * stride * ns;
	  
	  for (octave_idx_type i = 0; i < ns; i++)
	    pvi[i] = v[i*stride + offset];

	  lsort.sort (pvi, ns);
	      
	  for (octave_idx_type i = 0; i < ns; i++)
	    v[i*stride + offset] = pvi[i];
	}
    }

  return m;
}

template <class T>
Array<T>
Array<T>::sort (Array<octave_idx_type> &sidx, octave_idx_type dim, 
		sortmode mode) const
{
  Array<T> m = *this;

  dim_vector dv = m.dims ();

  if (m.length () < 1)
    {
      sidx = Array<octave_idx_type> (dv);
      return m;
    }

  octave_idx_type ns = dv(dim);
  octave_idx_type iter = dv.numel () / ns;
  octave_idx_type stride = 1;
  for (int i = 0; i < dim; i++)
    stride *= dv(i);

  T *v = m.fortran_vec ();
  octave_sort<vec_index<T> *> indexed_sort;

  if (mode == ASCENDING) 
    indexed_sort.set_compare (ascending_compare);
  else if (mode == DESCENDING)
    indexed_sort.set_compare (descending_compare);
  else
    abort ();

  OCTAVE_LOCAL_BUFFER (vec_index<T> *, vi, ns);
  OCTAVE_LOCAL_BUFFER (vec_index<T>, vix, ns);

  for (octave_idx_type i = 0; i < ns; i++)
    vi[i] = &vix[i];

  sidx = Array<octave_idx_type> (dv);
      
  if (stride == 1)
    {
      for (octave_idx_type j = 0; j < iter; j++)
	{
	   octave_idx_type offset = j * ns;

	  for (octave_idx_type i = 0; i < ns; i++)
	    {
	      vi[i]->vec = v[i];
	      vi[i]->indx = i;
	    }

	  indexed_sort.sort (vi, ns);

	  for (octave_idx_type i = 0; i < ns; i++)
	    {
	      v[i] = vi[i]->vec;
	      sidx(i + offset) = vi[i]->indx;
	    }
	  v += ns;
	}
    }
  else
    {
      for (octave_idx_type j = 0; j < iter; j++)
	{
	  octave_idx_type offset = j;
	  octave_idx_type offset2 = 0;
	  while (offset >= stride)
	    {
	      offset -= stride;
	      offset2++;
	    }
	  offset += offset2 * stride * ns;
	      
	  for (octave_idx_type i = 0; i < ns; i++)
	    {
	      vi[i]->vec = v[i*stride + offset];
	      vi[i]->indx = i;
	    }

	  indexed_sort.sort (vi, ns);
	      
	  for (octave_idx_type i = 0; i < ns; i++)
	    {
	      v[i*stride+offset] = vi[i]->vec;
	      sidx(i*stride+offset) = vi[i]->indx;
	    }
	}
    }

  return m;
}

#if defined (HAVE_IEEE754_DATA_FORMAT)

template <>
bool ascending_compare (double, double);
template <>
bool ascending_compare (vec_index<double>*, vec_index<double>*);
template <>
bool descending_compare (double, double);
template <>
bool descending_compare (vec_index<double>*, vec_index<double>*);

template <>
Array<double> Array<double>::sort (octave_idx_type dim, sortmode mode) const;
template <>
Array<double> Array<double>::sort (Array<octave_idx_type> &sidx,
				   octave_idx_type dim, sortmode mode) const;

#endif

template <class T>
Array<T>
Array<T>::diag (octave_idx_type k) const
{
  dim_vector dv = dims ();
  octave_idx_type nd = dv.length ();
  Array<T> d;

  if (nd > 2)
    (*current_liboctave_error_handler) ("Matrix must be 2-dimensional");    
  else
    {
      octave_idx_type nnr = dv (0);
      octave_idx_type nnc = dv (1);

      if (nnr == 0 || nnc == 0)
	; // do nothing
      else if (nnr != 1 && nnc != 1)
	{
	  if (k > 0)
	    nnc -= k;
	  else if (k < 0)
	    nnr += k;

	  if (nnr > 0 && nnc > 0)
	    {
	      octave_idx_type ndiag = (nnr < nnc) ? nnr : nnc;

	      d.resize (dim_vector (ndiag, 1));

	      if (k > 0)
		{
		  for (octave_idx_type i = 0; i < ndiag; i++)
		    d.xelem (i) = elem (i, i+k);
		}
	      else if (k < 0)
		{
		  for (octave_idx_type i = 0; i < ndiag; i++)
		    d.xelem (i) = elem (i-k, i);
		}
	      else
		{
		  for (octave_idx_type i = 0; i < ndiag; i++)
		    d.xelem (i) = elem (i, i);
		}
	    }
	  else
	    (*current_liboctave_error_handler)
	      ("diag: requested diagonal out of range");
	}
      else if (nnr != 0 && nnc != 0)
	{
	  octave_idx_type roff = 0;
	  octave_idx_type coff = 0;
	  if (k > 0)
	    {
	      roff = 0;
	      coff = k;
	    }
	  else if (k < 0)
	    {
	      roff = -k;
	      coff = 0;
	    }

	  if (nnr == 1)
	    {
	      octave_idx_type n = nnc + std::abs (k);
	      d = Array<T> (dim_vector (n, n), resize_fill_value ());

	      for (octave_idx_type i = 0; i < nnc; i++)
		d.xelem (i+roff, i+coff) = elem (0, i);
	    }
	  else
	    {
	      octave_idx_type n = nnr + std::abs (k);
	      d = Array<T> (dim_vector (n, n), resize_fill_value ());

	      for (octave_idx_type i = 0; i < nnr; i++)
		d.xelem (i+roff, i+coff) = elem (i, 0);
	    }
	}
    }

  return d;
}

template <class T>
void
Array<T>::print_info (std::ostream& os, const std::string& prefix) const
{
  os << prefix << "rep address: " << rep << "\n"
     << prefix << "rep->len:    " << rep->len << "\n"
     << prefix << "rep->data:   " << static_cast<void *> (rep->data) << "\n"
     << prefix << "rep->count:  " << rep->count << "\n";

  // 2D info:
  //
  //     << pefix << "rows: " << rows () << "\n"
  //     << prefix << "cols: " << cols () << "\n";
}

/*
;;; Local Variables: ***
;;; mode: C++ ***
;;; End: ***
*/