diff liboctave/Array-f.cc @ 8700:314be237cd5b

sorting optimizations
author Jaroslav Hajek <highegg@gmail.com>
date Mon, 09 Feb 2009 13:05:35 +0100
parents ea8e65ca234f
children 739141cde75a
line wrap: on
line diff
--- a/liboctave/Array-f.cc	Mon Feb 09 01:56:06 2009 -0500
+++ b/liboctave/Array-f.cc	Mon Feb 09 13:05:35 2009 +0100
@@ -27,367 +27,22 @@
 
 // Instantiate Arrays of float values.
 
+#include "lo-ieee.h"
 #include "Array.h"
 #include "Array.cc"
-#include "oct-sort.cc"
 #include "oct-locbuf.h"
 
-#if defined (HAVE_IEEE754_DATA_FORMAT)
-
-static inline uint32_t
-FloatFlip (uint32_t f)
-{
-  uint32_t mask
-    = -static_cast<int32_t>(f >> 31) | 0x80000000UL;
-
-  return f ^ mask;
-}
-
-static inline uint32_t
-IFloatFlip (uint32_t f)
-{
-  uint32_t mask = ((f >> 31) - 1) | 0x80000000UL;
-
-  return f ^ mask;
-}
-
-template <>
-bool
-ascending_compare (float a, float b)
-{
-  return (xisnan (b) || (a < b));
-}
-
-template <>
-bool
-ascending_compare (const float *a, const float *b)
-{
-  return (xisnan (*b) || (*a < *b));
-}
-
-template <>
-bool
-descending_compare (float a, float b)
-{
-  return (xisnan (a) || (a > b));
-}
-
-template <>
-bool
-descending_compare (const float *a, const float *b)
-{
-  return (xisnan (*b) || (*a > *b));
-}
-
-INSTANTIATE_ARRAY_SORT (uint32_t);
+#define INLINE_ASCENDING_SORT
+#define INLINE_DESCENDING_SORT
+#include "oct-sort.cc"
 
 template <>
-Array<float>
-Array<float>::sort (octave_idx_type dim, sortmode mode) const
+inline bool _sort_isnan (float x)
 {
-  Array<float> m (dims ());
-
-  dim_vector dv = m.dims ();
-
-  if (m.length () < 1)
-    return m;
-
-  octave_idx_type ns = dv(dim);
-  octave_idx_type iter = dv.numel () / ns;
-  octave_idx_type stride = 1;
-  for (int i = 0; i < dim; i++)
-    stride *= dv(i);
-
-  float *v = m.fortran_vec ();
-  const float *ov = data ();
-
-  uint32_t *p = reinterpret_cast<uint32_t *> (v);
-  const uint32_t *op = reinterpret_cast<const uint32_t *> (ov);
-
-  octave_sort<uint32_t> lsort;
-
-  if (mode == ASCENDING)
-    lsort.set_compare (ascending_compare);
-  else if (mode == DESCENDING)
-    lsort.set_compare (descending_compare);
-  else
-    abort ();
-
-  if (stride == 1)
-    {
-      for (octave_idx_type j = 0; j < iter; j++)
-	{
-	  // Flip the data in the vector so that int compares on
-	  // IEEE754 give the correct ordering.
-
-	  for (octave_idx_type i = 0; i < ns; i++)
-	    p[i] = FloatFlip (op[i]);
-	      
-	  lsort.sort (p, ns);
-
-	  // Flip the data out of the vector so that int compares
-	  // on IEEE754 give the correct ordering.
-
-	  for (octave_idx_type i = 0; i < ns; i++)
-	    p[i] = IFloatFlip (p[i]);
-
-	  // There are two representations of NaN.  One will be
-	  // sorted to the beginning of the vector and the other
-	  // to the end.  If it will be sorted incorrectly, fix
-	  // things up.
-
-	  if (lo_ieee_signbit (octave_Float_NaN))
-	    {
-	      if (mode == ASCENDING)
-		{
-		  octave_idx_type i = 0;
-		  float *vtmp = reinterpret_cast<float *> (p);
-		  while (xisnan (vtmp[i++]) && i < ns)
-		    /* do nothing */;
-		  for (octave_idx_type l = 0; l < ns - i + 1; l++)
-		    vtmp[l] = vtmp[l+i-1];
-		  for (octave_idx_type l = ns - i + 1; l < ns; l++)
-		    vtmp[l] = octave_Float_NaN;
-		}
-	      else
-		{
-		  octave_idx_type i = ns;
-		  float *vtmp = reinterpret_cast<float *> (p);
-		  while (xisnan (vtmp[--i]) && i > 0)
-		    /* do nothing */;
-		  for (octave_idx_type l = i; l >= 0; l--)
-		    vtmp[l-i+ns-1] = vtmp[l];
-		  for (octave_idx_type l = 0; l < ns - i - 1; l++)
-		    vtmp[l] = octave_Float_NaN;
-		}
-	    }
-
-	  p += ns;
-          op += ns;
-	}
-    }
-  else
-    {
-      OCTAVE_LOCAL_BUFFER (uint32_t, vi, ns);
-
-      for (octave_idx_type j = 0; j < iter; j++)
-	{
-	  octave_idx_type offset = j;
-	  octave_idx_type offset2 = 0;
-	  while (offset >= stride)
-	    {
-	      offset -= stride;
-	      offset2++;
-	    }
-	  offset += offset2 * stride * ns;
-
-	  // Flip the data in the vector so that int compares on
-	  // IEEE754 give the correct ordering.
-
-	  for (octave_idx_type i = 0; i < ns; i++)
-	    vi[i] = FloatFlip (op[i*stride + offset]);
-
-	  lsort.sort (vi, ns);
-
-	  // Flip the data out of the vector so that int compares
-	  // on IEEE754 give the correct ordering.
-
-	  for (octave_idx_type i = 0; i < ns; i++)
-	    p[i*stride + offset] = IFloatFlip (vi[i]);
-	      
-	  // There are two representations of NaN. One will be
-	  // sorted to the beginning of the vector and the other
-	  // to the end. If it will be sorted to the beginning,
-	  // fix things up.
-
-	  if (lo_ieee_signbit (octave_Float_NaN))
-	    {
-	      if (mode == ASCENDING)
-		{
-		   octave_idx_type i = 0;
-		  while (xisnan (v[i++*stride + offset]) && i < ns)
-		    /* do nothing */;
-		  for (octave_idx_type l = 0; l < ns - i + 1; l++)
-		    v[l*stride + offset] = v[(l+i-1)*stride + offset];
-		  for (octave_idx_type l = ns - i + 1; l < ns; l++)
-		    v[l*stride + offset] = octave_Float_NaN;
-		}
-	      else
-		{
-		   octave_idx_type i = ns;
-		  while (xisnan (v[--i*stride + offset]) && i > 0)
-		    /* do nothing */;
-		  for (octave_idx_type l = i; l >= 0; l--)
-		    v[(l-i+ns-1)*stride + offset] = v[l*stride + offset];
-		  for (octave_idx_type l = 0; l < ns - i - 1; l++)
-		    v[l*stride + offset] = octave_Float_NaN;
-		}
-	    }
-	}
-    }
-
-  return m;
+  return lo_ieee_isnan (x);
 }
 
-template <>
-Array<float>
-Array<float>::sort (Array<octave_idx_type> &sidx, octave_idx_type dim, 
-		     sortmode mode) const
-{
-  Array<float> m (dims ());
-
-  dim_vector dv = m.dims ();
-
-  if (m.length () < 1)
-    {
-      sidx = Array<octave_idx_type> (dv);
-      return m;
-    }
-
-  octave_idx_type ns = dv(dim);
-  octave_idx_type iter = dv.numel () / ns;
-  octave_idx_type stride = 1;
-  for (int i = 0; i < dim; i++)
-    stride *= dv(i);
-
-  float *v = m.fortran_vec ();
-  const float *ov = data ();
-
-  uint32_t *p = reinterpret_cast<uint32_t *> (v);
-  const uint32_t *op = reinterpret_cast<const uint32_t *> (ov);
-
-  octave_sort<const uint32_t *> indexed_sort;
-
-  if (mode == ASCENDING)
-    indexed_sort.set_compare (ascending_compare);
-  else if (mode == DESCENDING)
-    indexed_sort.set_compare (descending_compare);
-  else
-    abort ();
-
-  OCTAVE_LOCAL_BUFFER (const uint32_t *, vi, ns);
-  OCTAVE_LOCAL_BUFFER (uint32_t, vix, ns);
-  
-  sidx = Array<octave_idx_type> (dv);
-      
-  for (octave_idx_type j = 0; j < iter; j++)
-    {
-      octave_idx_type offset = j;
-      octave_idx_type offset2 = 0;
-      while (offset >= stride)
-	{
-	  offset -= stride;
-	  offset2++;
-	}
-      offset += offset2 * stride * ns;
-
-      // Flip the data in the vector so that int compares on
-      // IEEE754 give the correct ordering.
-
-      for (octave_idx_type i = 0; i < ns; i++)
-	{
-	  vix[i] = FloatFlip (op[i*stride + offset]);
-	  vi[i] = vix + i;
-	}
-
-      indexed_sort.sort (vi, ns);
-
-      // Flip the data out of the vector so that int compares on
-      // IEEE754 give the correct ordering
-
-      for (octave_idx_type i = 0; i < ns; i++)
-	{
-	  p[i*stride + offset] = IFloatFlip (*vi[i]);
-	  sidx(i*stride + offset) = vi[i] - vix;
-	}
-
-      // There are two representations of NaN.  One will be sorted
-      // to the beginning of the vector and the other to the end.
-      // If it will be sorted to the beginning, fix things up.
-
-      if (lo_ieee_signbit (octave_Float_NaN))
-	{
-	  if (mode == ASCENDING)
-	    {
-	      octave_idx_type i = 0;
-	      while (xisnan (v[i++*stride+offset]) && i < ns)
-		/* do nothing */;
-	      OCTAVE_LOCAL_BUFFER (float, itmp, i - 1);
-	      for (octave_idx_type l = 0; l < i -1; l++)
-		itmp[l] = sidx(l*stride + offset);
-	      for (octave_idx_type l = 0; l < ns - i + 1; l++)
-		{
-		  v[l*stride + offset] = v[(l+i-1)*stride + offset];
-		  sidx(l*stride + offset) = sidx((l+i-1)*stride + offset);
-		}
-	      for (octave_idx_type k = 0, l = ns - i + 1; l < ns; l++, k++)
-		{
-		  v[l*stride + offset] = octave_Float_NaN;
-		  sidx(l*stride + offset) = 
-		    static_cast<octave_idx_type>(itmp[k]);
-		}
-	    }
-	  else 
-	    {
-	      octave_idx_type i = ns;
-	      while (xisnan (v[--i*stride+offset]) && i > 0)
-		/* do nothing */;
-	      OCTAVE_LOCAL_BUFFER (float, itmp, ns - i - 1);
-	      for (octave_idx_type l = 0; l < ns - i -1; l++)
-		itmp[l] = sidx((l+i+1)*stride + offset);
-	      for (octave_idx_type l = i; l >= 0; l--)
-		{
-		  v[(l-i+ns-1)*stride + offset] = v[l*stride + offset];
-		  sidx((l-i+ns-1)*stride + offset) = sidx(l*stride + offset);
-		}
-	      for (octave_idx_type k = 0, l = 0; l < ns - i - 1; l++, k++)
-		{
-		  v[l*stride + offset] = octave_Float_NaN;
-		  sidx(l*stride + offset) = 
-		    static_cast<octave_idx_type>(itmp[k]);
-		}
-	    }
-	}
-    }
-
-  return m;
-}
-
-#else
-
-template <>
-bool
-ascending_compare (float a, float b)
-{
-  return (xisnan (b) || (a < b));
-}
-
-template <>
-bool
-ascending_compare (const float *a, 
-		   const float *b)
-{
-  return (xisnan (*b) || (*a < *b));
-}
-
-template <>
-bool
-descending_compare (float a, float b)
-{
-  return (xisnan (a) || (a > b));
-}
-
-template <>
-bool
-descending_compare (const float *a, 
-		    const float *b)
-{
-  return (xisnan (*b) || (*a > *b));
-}
-
-INSTANTIATE_ARRAY_SORT (float);
-
-#endif
+INSTANTIATE_ARRAY_SORT (double);
 
 INSTANTIATE_ARRAY_AND_ASSIGN (float, OCTAVE_API);