# HG changeset patch # User Jaroslav Hajek # Date 1234181135 -3600 # Node ID 314be237cd5b716245f5c2ffb4d6297b777d6bb0 # Parent 6e764b7317bd843583be6660d19f236d2a269113 sorting optimizations diff -r 6e764b7317bd -r 314be237cd5b liboctave/Array-C.cc --- a/liboctave/Array-C.cc Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/Array-C.cc Mon Feb 09 13:05:35 2009 +0100 @@ -31,6 +31,7 @@ #include "Array.h" #include "Array.cc" +#include "oct-sort.cc" static double xabs (const Complex& x) @@ -38,27 +39,9 @@ return (xisinf (x.real ()) || xisinf (x.imag ())) ? octave_Inf : abs (x); } -static bool -operator < (const Complex& a, const Complex& b) -{ - return (xisnan (b) || (xabs (a) < xabs (b)) - || ((xabs (a) == xabs (b)) && (arg (a) < arg (b)))); -} - -static bool -operator > (const Complex& a, const Complex& b) -{ - return (xisnan (a) || (xabs (a) > xabs (b)) - || ((xabs (a) == xabs (b)) && (arg (a) > arg (b)))); -} - -// This file must be included after the < and > operators are -// defined to avoid errors with the Intel C++ compiler. -#include "oct-sort.cc" - template <> bool -ascending_compare (Complex a, Complex b) +octave_sort::ascending_compare (Complex a, Complex b) { return (xisnan (b) || (xabs (a) < xabs (b)) || ((xabs (a) == xabs (b)) && (arg (a) < arg (b)))); @@ -66,32 +49,12 @@ template <> bool -ascending_compare (vec_index *a, vec_index *b) -{ - return (xisnan (b->vec) - || (xabs (a->vec) < xabs (b->vec)) - || ((xabs (a->vec) == xabs (b->vec)) - && (arg (a->vec) < arg (b->vec)))); -} - -template <> -bool -descending_compare (Complex a, Complex b) +octave_sort::descending_compare (Complex a, Complex b) { return (xisnan (a) || (xabs (a) > xabs (b)) || ((xabs (a) == xabs (b)) && (arg (a) > arg (b)))); } -template <> -bool -descending_compare (vec_index *a, vec_index *b) -{ - return (xisnan (a->vec) - || (xabs (a->vec) > xabs (b->vec)) - || ((xabs (a->vec) == xabs (b->vec)) - && (arg (a->vec) > arg (b->vec)))); -} - INSTANTIATE_ARRAY_SORT (Complex); INSTANTIATE_ARRAY_AND_ASSIGN (Complex, OCTAVE_API); diff -r 6e764b7317bd -r 314be237cd5b liboctave/Array-d.cc --- a/liboctave/Array-d.cc Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/Array-d.cc Mon Feb 09 13:05:35 2009 +0100 @@ -27,368 +27,23 @@ // Instantiate Arrays of double values. +#include "lo-ieee.h" #include "Array.h" #include "Array.cc" -#include "oct-sort.cc" #include "oct-locbuf.h" -#if defined (HAVE_IEEE754_DATA_FORMAT) - -static inline uint64_t -FloatFlip (uint64_t f) -{ - uint64_t mask - = -static_cast(f >> 63) | 0x8000000000000000ULL; - - return f ^ mask; -} - -static inline uint64_t -IFloatFlip (uint64_t f) -{ - uint64_t mask = ((f >> 63) - 1) | 0x8000000000000000ULL; - - return f ^ mask; -} - -template <> -bool -ascending_compare (double a, double b) -{ - return (xisnan (b) || (a < b)); -} - -template <> -bool -ascending_compare (const double *a, const double *b) -{ - return (xisnan (*b) || (*a < *b)); -} - -template <> -bool -descending_compare (double a, double b) -{ - return (xisnan (a) || (a > b)); -} - -template <> -bool -descending_compare (const double *a, const double *b) -{ - return (xisnan (*b) || (*a > *b)); -} - -INSTANTIATE_ARRAY_SORT (uint64_t); +#define INLINE_ASCENDING_SORT +#define INLINE_DESCENDING_SORT +#include "oct-sort.cc" template <> -Array -Array::sort (octave_idx_type dim, sortmode mode) const +inline bool _sort_isnan (double x) { - Array m (dims ()); - - dim_vector dv = m.dims (); - - if (m.length () < 1) - return m; - - octave_idx_type ns = dv(dim); - octave_idx_type iter = dv.numel () / ns; - octave_idx_type stride = 1; - for (int i = 0; i < dim; i++) - stride *= dv(i); - - double *v = m.fortran_vec (); - const double *ov = data (); - - uint64_t *p = reinterpret_cast (v); - const uint64_t *op = reinterpret_cast (ov); - - octave_sort lsort; - - if (mode == ASCENDING) - lsort.set_compare (ascending_compare); - else if (mode == DESCENDING) - lsort.set_compare (descending_compare); - else - abort (); - - if (stride == 1) - { - for (octave_idx_type j = 0; j < iter; j++) - { - // Flip the data in the vector so that int compares on - // IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - p[i] = FloatFlip (op[i]); - - lsort.sort (p, ns); - - // Flip the data out of the vector so that int compares - // on IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - p[i] = IFloatFlip (p[i]); - - // There are two representations of NaN. One will be - // sorted to the beginning of the vector and the other - // to the end. If it will be sorted incorrectly, fix - // things up. - - if (lo_ieee_signbit (octave_NaN)) - { - if (mode == ASCENDING) - { - octave_idx_type i = 0; - double *vtmp = reinterpret_cast (p); - while (xisnan (vtmp[i++]) && i < ns) - /* do nothing */; - for (octave_idx_type l = 0; l < ns - i + 1; l++) - vtmp[l] = vtmp[l+i-1]; - for (octave_idx_type l = ns - i + 1; l < ns; l++) - vtmp[l] = octave_NaN; - } - else - { - octave_idx_type i = ns; - double *vtmp = reinterpret_cast (p); - while (xisnan (vtmp[--i]) && i > 0) - /* do nothing */; - for (octave_idx_type l = i; l >= 0; l--) - vtmp[l-i+ns-1] = vtmp[l]; - for (octave_idx_type l = 0; l < ns - i - 1; l++) - vtmp[l] = octave_NaN; - } - } - - p += ns; - op += ns; - } - } - else - { - OCTAVE_LOCAL_BUFFER (uint64_t, vi, ns); - - for (octave_idx_type j = 0; j < iter; j++) - { - octave_idx_type offset = j; - octave_idx_type offset2 = 0; - while (offset >= stride) - { - offset -= stride; - offset2++; - } - offset += offset2 * stride * ns; - - // Flip the data in the vector so that int compares on - // IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - vi[i] = FloatFlip (op[i*stride + offset]); - - lsort.sort (vi, ns); - - // Flip the data out of the vector so that int compares - // on IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - p[i*stride + offset] = IFloatFlip (vi[i]); - - // There are two representations of NaN. One will be - // sorted to the beginning of the vector and the other - // to the end. If it will be sorted to the beginning, - // fix things up. - - if (lo_ieee_signbit (octave_NaN)) - { - if (mode == ASCENDING) - { - octave_idx_type i = 0; - while (xisnan (v[i++*stride + offset]) && i < ns) - /* do nothing */; - for (octave_idx_type l = 0; l < ns - i + 1; l++) - v[l*stride + offset] = v[(l+i-1)*stride + offset]; - for (octave_idx_type l = ns - i + 1; l < ns; l++) - v[l*stride + offset] = octave_NaN; - } - else - { - octave_idx_type i = ns; - while (xisnan (v[--i*stride + offset]) && i > 0) - /* do nothing */; - for (octave_idx_type l = i; l >= 0; l--) - v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; - for (octave_idx_type l = 0; l < ns - i - 1; l++) - v[l*stride + offset] = octave_NaN; - } - } - } - } - - return m; -} - -template <> -Array -Array::sort (Array &sidx, octave_idx_type dim, - sortmode mode) const -{ - Array m (dims ()); - - dim_vector dv = m.dims (); - - if (m.length () < 1) - { - sidx = Array (dv); - return m; - } - - octave_idx_type ns = dv(dim); - octave_idx_type iter = dv.numel () / ns; - octave_idx_type stride = 1; - for (int i = 0; i < dim; i++) - stride *= dv(i); - - double *v = m.fortran_vec (); - const double *ov = data (); - - uint64_t *p = reinterpret_cast (v); - const uint64_t *op = reinterpret_cast (ov); - - octave_sort indexed_sort; - - if (mode == ASCENDING) - indexed_sort.set_compare (ascending_compare); - else if (mode == DESCENDING) - indexed_sort.set_compare (descending_compare); - else - abort (); - - OCTAVE_LOCAL_BUFFER (const uint64_t *, vi, ns); - OCTAVE_LOCAL_BUFFER (uint64_t, vix, ns); - - sidx = Array (dv); - - for (octave_idx_type j = 0; j < iter; j++) - { - octave_idx_type offset = j; - octave_idx_type offset2 = 0; - while (offset >= stride) - { - offset -= stride; - offset2++; - } - offset += offset2 * stride * ns; - - // Flip the data in the vector so that int compares on - // IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - { - vix[i] = FloatFlip (op[i*stride + offset]); - vi[i] = vix + i; - } - - indexed_sort.sort (vi, ns); - - // Flip the data out of the vector so that int compares on - // IEEE754 give the correct ordering - - for (octave_idx_type i = 0; i < ns; i++) - { - p[i*stride + offset] = IFloatFlip (*vi[i]); - sidx(i*stride + offset) = vi[i] - vix; - } - - // There are two representations of NaN. One will be sorted - // to the beginning of the vector and the other to the end. - // If it will be sorted to the beginning, fix things up. - - if (lo_ieee_signbit (octave_NaN)) - { - if (mode == ASCENDING) - { - octave_idx_type i = 0; - while (xisnan (v[i++*stride+offset]) && i < ns) - /* do nothing */; - OCTAVE_LOCAL_BUFFER (double, itmp, i - 1); - for (octave_idx_type l = 0; l < i -1; l++) - itmp[l] = sidx(l*stride + offset); - for (octave_idx_type l = 0; l < ns - i + 1; l++) - { - v[l*stride + offset] = v[(l+i-1)*stride + offset]; - sidx(l*stride + offset) = sidx((l+i-1)*stride + offset); - } - for (octave_idx_type k = 0, l = ns - i + 1; l < ns; l++, k++) - { - v[l*stride + offset] = octave_NaN; - sidx(l*stride + offset) = - static_cast(itmp[k]); - } - } - else - { - octave_idx_type i = ns; - while (xisnan (v[--i*stride+offset]) && i > 0) - /* do nothing */; - OCTAVE_LOCAL_BUFFER (double, itmp, ns - i - 1); - for (octave_idx_type l = 0; l < ns - i -1; l++) - itmp[l] = sidx((l+i+1)*stride + offset); - for (octave_idx_type l = i; l >= 0; l--) - { - v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; - sidx((l-i+ns-1)*stride + offset) = sidx(l*stride + offset); - } - for (octave_idx_type k = 0, l = 0; l < ns - i - 1; l++, k++) - { - v[l*stride + offset] = octave_NaN; - sidx(l*stride + offset) = - static_cast(itmp[k]); - } - } - } - } - - return m; -} - -#else - -template <> -bool -ascending_compare (double a, double b) -{ - return (xisnan (b) || (a < b)); -} - -template <> -bool -ascending_compare (const double *a, - const double *b) -{ - return (xisnan (*b) || (*a < *b)); -} - -template <> -bool -descending_compare (double a, double b) -{ - return (xisnan (a) || (a > b)); -} - -template <> -bool -descending_compare (const double *a, - const double *b) -{ - return (xisnan (*b) || (*a > *b)); + return lo_ieee_isnan (x); } INSTANTIATE_ARRAY_SORT (double); -#endif - INSTANTIATE_ARRAY_AND_ASSIGN (double, OCTAVE_API); INSTANTIATE_ARRAY_ASSIGN (double, int, OCTAVE_API) diff -r 6e764b7317bd -r 314be237cd5b liboctave/Array-f.cc --- a/liboctave/Array-f.cc Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/Array-f.cc Mon Feb 09 13:05:35 2009 +0100 @@ -27,367 +27,22 @@ // Instantiate Arrays of float values. +#include "lo-ieee.h" #include "Array.h" #include "Array.cc" -#include "oct-sort.cc" #include "oct-locbuf.h" -#if defined (HAVE_IEEE754_DATA_FORMAT) - -static inline uint32_t -FloatFlip (uint32_t f) -{ - uint32_t mask - = -static_cast(f >> 31) | 0x80000000UL; - - return f ^ mask; -} - -static inline uint32_t -IFloatFlip (uint32_t f) -{ - uint32_t mask = ((f >> 31) - 1) | 0x80000000UL; - - return f ^ mask; -} - -template <> -bool -ascending_compare (float a, float b) -{ - return (xisnan (b) || (a < b)); -} - -template <> -bool -ascending_compare (const float *a, const float *b) -{ - return (xisnan (*b) || (*a < *b)); -} - -template <> -bool -descending_compare (float a, float b) -{ - return (xisnan (a) || (a > b)); -} - -template <> -bool -descending_compare (const float *a, const float *b) -{ - return (xisnan (*b) || (*a > *b)); -} - -INSTANTIATE_ARRAY_SORT (uint32_t); +#define INLINE_ASCENDING_SORT +#define INLINE_DESCENDING_SORT +#include "oct-sort.cc" template <> -Array -Array::sort (octave_idx_type dim, sortmode mode) const +inline bool _sort_isnan (float x) { - Array m (dims ()); - - dim_vector dv = m.dims (); - - if (m.length () < 1) - return m; - - octave_idx_type ns = dv(dim); - octave_idx_type iter = dv.numel () / ns; - octave_idx_type stride = 1; - for (int i = 0; i < dim; i++) - stride *= dv(i); - - float *v = m.fortran_vec (); - const float *ov = data (); - - uint32_t *p = reinterpret_cast (v); - const uint32_t *op = reinterpret_cast (ov); - - octave_sort lsort; - - if (mode == ASCENDING) - lsort.set_compare (ascending_compare); - else if (mode == DESCENDING) - lsort.set_compare (descending_compare); - else - abort (); - - if (stride == 1) - { - for (octave_idx_type j = 0; j < iter; j++) - { - // Flip the data in the vector so that int compares on - // IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - p[i] = FloatFlip (op[i]); - - lsort.sort (p, ns); - - // Flip the data out of the vector so that int compares - // on IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - p[i] = IFloatFlip (p[i]); - - // There are two representations of NaN. One will be - // sorted to the beginning of the vector and the other - // to the end. If it will be sorted incorrectly, fix - // things up. - - if (lo_ieee_signbit (octave_Float_NaN)) - { - if (mode == ASCENDING) - { - octave_idx_type i = 0; - float *vtmp = reinterpret_cast (p); - while (xisnan (vtmp[i++]) && i < ns) - /* do nothing */; - for (octave_idx_type l = 0; l < ns - i + 1; l++) - vtmp[l] = vtmp[l+i-1]; - for (octave_idx_type l = ns - i + 1; l < ns; l++) - vtmp[l] = octave_Float_NaN; - } - else - { - octave_idx_type i = ns; - float *vtmp = reinterpret_cast (p); - while (xisnan (vtmp[--i]) && i > 0) - /* do nothing */; - for (octave_idx_type l = i; l >= 0; l--) - vtmp[l-i+ns-1] = vtmp[l]; - for (octave_idx_type l = 0; l < ns - i - 1; l++) - vtmp[l] = octave_Float_NaN; - } - } - - p += ns; - op += ns; - } - } - else - { - OCTAVE_LOCAL_BUFFER (uint32_t, vi, ns); - - for (octave_idx_type j = 0; j < iter; j++) - { - octave_idx_type offset = j; - octave_idx_type offset2 = 0; - while (offset >= stride) - { - offset -= stride; - offset2++; - } - offset += offset2 * stride * ns; - - // Flip the data in the vector so that int compares on - // IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - vi[i] = FloatFlip (op[i*stride + offset]); - - lsort.sort (vi, ns); - - // Flip the data out of the vector so that int compares - // on IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - p[i*stride + offset] = IFloatFlip (vi[i]); - - // There are two representations of NaN. One will be - // sorted to the beginning of the vector and the other - // to the end. If it will be sorted to the beginning, - // fix things up. - - if (lo_ieee_signbit (octave_Float_NaN)) - { - if (mode == ASCENDING) - { - octave_idx_type i = 0; - while (xisnan (v[i++*stride + offset]) && i < ns) - /* do nothing */; - for (octave_idx_type l = 0; l < ns - i + 1; l++) - v[l*stride + offset] = v[(l+i-1)*stride + offset]; - for (octave_idx_type l = ns - i + 1; l < ns; l++) - v[l*stride + offset] = octave_Float_NaN; - } - else - { - octave_idx_type i = ns; - while (xisnan (v[--i*stride + offset]) && i > 0) - /* do nothing */; - for (octave_idx_type l = i; l >= 0; l--) - v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; - for (octave_idx_type l = 0; l < ns - i - 1; l++) - v[l*stride + offset] = octave_Float_NaN; - } - } - } - } - - return m; + return lo_ieee_isnan (x); } -template <> -Array -Array::sort (Array &sidx, octave_idx_type dim, - sortmode mode) const -{ - Array m (dims ()); - - dim_vector dv = m.dims (); - - if (m.length () < 1) - { - sidx = Array (dv); - return m; - } - - octave_idx_type ns = dv(dim); - octave_idx_type iter = dv.numel () / ns; - octave_idx_type stride = 1; - for (int i = 0; i < dim; i++) - stride *= dv(i); - - float *v = m.fortran_vec (); - const float *ov = data (); - - uint32_t *p = reinterpret_cast (v); - const uint32_t *op = reinterpret_cast (ov); - - octave_sort indexed_sort; - - if (mode == ASCENDING) - indexed_sort.set_compare (ascending_compare); - else if (mode == DESCENDING) - indexed_sort.set_compare (descending_compare); - else - abort (); - - OCTAVE_LOCAL_BUFFER (const uint32_t *, vi, ns); - OCTAVE_LOCAL_BUFFER (uint32_t, vix, ns); - - sidx = Array (dv); - - for (octave_idx_type j = 0; j < iter; j++) - { - octave_idx_type offset = j; - octave_idx_type offset2 = 0; - while (offset >= stride) - { - offset -= stride; - offset2++; - } - offset += offset2 * stride * ns; - - // Flip the data in the vector so that int compares on - // IEEE754 give the correct ordering. - - for (octave_idx_type i = 0; i < ns; i++) - { - vix[i] = FloatFlip (op[i*stride + offset]); - vi[i] = vix + i; - } - - indexed_sort.sort (vi, ns); - - // Flip the data out of the vector so that int compares on - // IEEE754 give the correct ordering - - for (octave_idx_type i = 0; i < ns; i++) - { - p[i*stride + offset] = IFloatFlip (*vi[i]); - sidx(i*stride + offset) = vi[i] - vix; - } - - // There are two representations of NaN. One will be sorted - // to the beginning of the vector and the other to the end. - // If it will be sorted to the beginning, fix things up. - - if (lo_ieee_signbit (octave_Float_NaN)) - { - if (mode == ASCENDING) - { - octave_idx_type i = 0; - while (xisnan (v[i++*stride+offset]) && i < ns) - /* do nothing */; - OCTAVE_LOCAL_BUFFER (float, itmp, i - 1); - for (octave_idx_type l = 0; l < i -1; l++) - itmp[l] = sidx(l*stride + offset); - for (octave_idx_type l = 0; l < ns - i + 1; l++) - { - v[l*stride + offset] = v[(l+i-1)*stride + offset]; - sidx(l*stride + offset) = sidx((l+i-1)*stride + offset); - } - for (octave_idx_type k = 0, l = ns - i + 1; l < ns; l++, k++) - { - v[l*stride + offset] = octave_Float_NaN; - sidx(l*stride + offset) = - static_cast(itmp[k]); - } - } - else - { - octave_idx_type i = ns; - while (xisnan (v[--i*stride+offset]) && i > 0) - /* do nothing */; - OCTAVE_LOCAL_BUFFER (float, itmp, ns - i - 1); - for (octave_idx_type l = 0; l < ns - i -1; l++) - itmp[l] = sidx((l+i+1)*stride + offset); - for (octave_idx_type l = i; l >= 0; l--) - { - v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; - sidx((l-i+ns-1)*stride + offset) = sidx(l*stride + offset); - } - for (octave_idx_type k = 0, l = 0; l < ns - i - 1; l++, k++) - { - v[l*stride + offset] = octave_Float_NaN; - sidx(l*stride + offset) = - static_cast(itmp[k]); - } - } - } - } - - return m; -} - -#else - -template <> -bool -ascending_compare (float a, float b) -{ - return (xisnan (b) || (a < b)); -} - -template <> -bool -ascending_compare (const float *a, - const float *b) -{ - return (xisnan (*b) || (*a < *b)); -} - -template <> -bool -descending_compare (float a, float b) -{ - return (xisnan (a) || (a > b)); -} - -template <> -bool -descending_compare (const float *a, - const float *b) -{ - return (xisnan (*b) || (*a > *b)); -} - -INSTANTIATE_ARRAY_SORT (float); - -#endif +INSTANTIATE_ARRAY_SORT (double); INSTANTIATE_ARRAY_AND_ASSIGN (float, OCTAVE_API); diff -r 6e764b7317bd -r 314be237cd5b liboctave/Array-fC.cc --- a/liboctave/Array-fC.cc Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/Array-fC.cc Mon Feb 09 13:05:35 2009 +0100 @@ -31,6 +31,7 @@ #include "Array.h" #include "Array.cc" +#include "oct-sort.cc" static float xabs (const FloatComplex& x) @@ -38,27 +39,9 @@ return (xisinf (x.real ()) || xisinf (x.imag ())) ? octave_Float_Inf : abs (x); } -static bool -operator < (const FloatComplex& a, const FloatComplex& b) -{ - return (xisnan (b) || (xabs (a) < xabs (b)) - || ((xabs (a) == xabs (b)) && (arg (a) < arg (b)))); -} - -static bool -operator > (const FloatComplex& a, const FloatComplex& b) -{ - return (xisnan (a) || (xabs (a) > xabs (b)) - || ((xabs (a) == xabs (b)) && (arg (a) > arg (b)))); -} - -// This file must be included after the < and > operators are -// defined to avoid errors with the Intel C++ compiler. -#include "oct-sort.cc" - template <> bool -ascending_compare (FloatComplex a, FloatComplex b) +octave_sort::ascending_compare (FloatComplex a, FloatComplex b) { return (xisnan (b) || (xabs (a) < xabs (b)) || ((xabs (a) == xabs (b)) && (arg (a) < arg (b)))); @@ -66,32 +49,12 @@ template <> bool -ascending_compare (vec_index *a, vec_index *b) -{ - return (xisnan (b->vec) - || (xabs (a->vec) < xabs (b->vec)) - || ((xabs (a->vec) == xabs (b->vec)) - && (arg (a->vec) < arg (b->vec)))); -} - -template <> -bool -descending_compare (FloatComplex a, FloatComplex b) +octave_sort::descending_compare (FloatComplex a, FloatComplex b) { return (xisnan (a) || (xabs (a) > xabs (b)) || ((xabs (a) == xabs (b)) && (arg (a) > arg (b)))); } -template <> -bool -descending_compare (vec_index *a, vec_index *b) -{ - return (xisnan (a->vec) - || (xabs (a->vec) > xabs (b->vec)) - || ((xabs (a->vec) == xabs (b->vec)) - && (arg (a->vec) > arg (b->vec)))); -} - INSTANTIATE_ARRAY_SORT (FloatComplex); INSTANTIATE_ARRAY_AND_ASSIGN (FloatComplex, OCTAVE_API); diff -r 6e764b7317bd -r 314be237cd5b liboctave/Array-i.cc --- a/liboctave/Array-i.cc Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/Array-i.cc Mon Feb 09 13:05:35 2009 +0100 @@ -31,6 +31,9 @@ #include "Array.h" #include "Array.cc" + +#define INLINE_ASCENDING_SORT +#define INLINE_DESCENDING_SORT #include "oct-sort.cc" INSTANTIATE_ARRAY_SORT (int); diff -r 6e764b7317bd -r 314be237cd5b liboctave/Array-s.cc --- a/liboctave/Array-s.cc Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/Array-s.cc Mon Feb 09 13:05:35 2009 +0100 @@ -29,6 +29,9 @@ #include "Array.h" #include "Array.cc" + +#define INLINE_ASCENDING_SORT +#define INLINE_DESCENDING_SORT #include "oct-sort.cc" INSTANTIATE_ARRAY_SORT (short); diff -r 6e764b7317bd -r 314be237cd5b liboctave/Array.cc --- a/liboctave/Array.cc Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/Array.cc Mon Feb 09 13:05:35 2009 +0100 @@ -1975,32 +1975,12 @@ dimensions = new_dims; } +// Non-real types don't have NaNs. template -bool -ascending_compare (T a, T b) -{ - return (a < b); -} - -template -bool -descending_compare (T a, T b) +inline +bool _sort_isnan (T) { - return (a > b); -} - -template -bool -ascending_compare (const T *a, const T *b) -{ - return (*a < *b); -} - -template -bool -descending_compare (const T *a, const T *b) -{ - return (*a > *b); + return false; } template @@ -2027,9 +2007,9 @@ octave_sort lsort; if (mode == ASCENDING) - lsort.set_compare (ascending_compare); + lsort.set_compare (octave_sort::ascending_compare); else if (mode == DESCENDING) - lsort.set_compare (descending_compare); + lsort.set_compare (octave_sort::descending_compare); else abort (); @@ -2037,15 +2017,36 @@ { for (octave_idx_type j = 0; j < iter; j++) { - std::copy (ov, ov + ns, v); - lsort.sort (v, ns); + // copy and partition out NaNs. + // FIXME: impact on integer types noticeable? + octave_idx_type kl = 0, ku = ns; + for (octave_idx_type i = 0; i < ns; i++) + { + T tmp = ov[i]; + if (_sort_isnan (tmp)) + v[--ku] = tmp; + else + v[kl++] = tmp; + } + + // sort. + lsort.sort (v, kl); + + if (ku < ns) + { + // NaNs are in reverse order + std::reverse (v + ku, v + ns); + if (mode == DESCENDING) + std::rotate (v, v + ku, v + ns); + } + v += ns; ov += ns; } } else { - OCTAVE_LOCAL_BUFFER (T, pvi, ns); + OCTAVE_LOCAL_BUFFER (T, buf, ns); for (octave_idx_type j = 0; j < iter; j++) { @@ -2060,13 +2061,32 @@ offset += offset2 * stride * ns; - for (octave_idx_type i = 0; i < ns; i++) - pvi[i] = ov[i*stride + offset]; + // gather and partition out NaNs. + // FIXME: impact on integer types noticeable? + octave_idx_type kl = 0, ku = ns; + for (octave_idx_type i = 0; i < ns; i++) + { + T tmp = ov[i*stride + offset]; + if (_sort_isnan (tmp)) + buf[--ku] = tmp; + else + buf[kl++] = tmp; + } - lsort.sort (pvi, ns); - + // sort. + lsort.sort (buf, kl); + + if (ku < ns) + { + // NaNs are in reverse order + std::reverse (buf + ku, buf + ns); + if (mode == DESCENDING) + std::rotate (buf, buf + ku, buf + ns); + } + + // scatter. for (octave_idx_type i = 0; i < ns; i++) - v[i*stride + offset] = pvi[i]; + v[i*stride + offset] = buf[i]; } } @@ -2098,44 +2118,68 @@ T *v = m.fortran_vec (); const T *ov = data (); - octave_sort indexed_sort; + octave_sort lsort; + sidx = Array (dv); + octave_idx_type *vi = sidx.fortran_vec (); + if (mode == ASCENDING) - indexed_sort.set_compare (ascending_compare); + lsort.set_compare (octave_sort::ascending_compare); else if (mode == DESCENDING) - indexed_sort.set_compare (descending_compare); + lsort.set_compare (octave_sort::descending_compare); else abort (); - OCTAVE_LOCAL_BUFFER (const T *, vi, ns); - - sidx = Array (dv); - if (stride == 1) { for (octave_idx_type j = 0; j < iter; j++) { - octave_idx_type offset = j * ns; - - for (octave_idx_type i = 0; i < ns; i++) - vi[i] = ov + i; - - indexed_sort.sort (vi, ns); + // copy and partition out NaNs. + // FIXME: impact on integer types noticeable? + octave_idx_type kl = 0, ku = ns; + for (octave_idx_type i = 0; i < ns; i++) + { + T tmp = ov[i]; + if (_sort_isnan (tmp)) + { + --ku; + v[ku] = tmp; + vi[ku] = i; + } + else + { + v[kl] = tmp; + vi[kl] = i; + kl++; + } + } - for (octave_idx_type i = 0; i < ns; i++) - { - v[i] = *vi[i]; - sidx(i + offset) = vi[i] - ov; - } + // sort. + lsort.sort (v, vi, kl); + + if (ku < ns) + { + // NaNs are in reverse order + std::reverse (v + ku, v + ns); + std::reverse (vi + ku, vi + ns); + if (mode == DESCENDING) + { + std::rotate (v, v + ku, v + ns); + std::rotate (vi, vi + ku, vi + ns); + } + } + v += ns; + vi += ns; ov += ns; } } else { - OCTAVE_LOCAL_BUFFER (T, vix, ns); + OCTAVE_LOCAL_BUFFER (T, buf, ns); + OCTAVE_LOCAL_BUFFER (octave_idx_type, bufi, ns); - for (octave_idx_type j = 0; j < iter; j++) + for (octave_idx_type j = 0; j < iter; j++) { octave_idx_type offset = j; octave_idx_type offset2 = 0; @@ -2147,45 +2191,53 @@ } offset += offset2 * stride * ns; - - for (octave_idx_type i = 0; i < ns; i++) - { - vix[i] = ov[i*stride + offset]; - vi[i] = vix + i; - } + + // gather and partition out NaNs. + // FIXME: impact on integer types noticeable? + octave_idx_type kl = 0, ku = ns; + for (octave_idx_type i = 0; i < ns; i++) + { + T tmp = ov[i*stride + offset]; + if (_sort_isnan (tmp)) + { + --ku; + buf[ku] = tmp; + bufi[ku] = i; + } + else + { + buf[kl] = tmp; + bufi[kl] = i; + kl++; + } + } - indexed_sort.sort (vi, ns); - + // sort. + lsort.sort (buf, bufi, kl); + + if (ku < ns) + { + // NaNs are in reverse order + std::reverse (buf + ku, buf + ns); + std::reverse (bufi + ku, bufi + ns); + if (mode == DESCENDING) + { + std::rotate (buf, buf + ku, buf + ns); + std::rotate (bufi, bufi + ku, bufi + ns); + } + } + + // scatter. for (octave_idx_type i = 0; i < ns; i++) - { - v[i*stride+offset] = *vi[i]; - sidx(i*stride+offset) = vi[i] - vix; - } + v[i*stride + offset] = buf[i]; + for (octave_idx_type i = 0; i < ns; i++) + vi[i*stride + offset] = bufi[i]; } } return m; } -#if defined (HAVE_IEEE754_DATA_FORMAT) - -template <> -bool ascending_compare (double, double); -template <> -bool ascending_compare (const double*, const double*); -template <> -bool descending_compare (double, double); -template <> -bool descending_compare (const double*, const double*); - -template <> -Array Array::sort (octave_idx_type dim, sortmode mode) const; -template <> -Array Array::sort (Array &sidx, - octave_idx_type dim, sortmode mode) const; - -#endif - template Array Array::diag (octave_idx_type k) const diff -r 6e764b7317bd -r 314be237cd5b liboctave/Array.h --- a/liboctave/Array.h Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/Array.h Mon Feb 09 13:05:35 2009 +0100 @@ -594,16 +594,8 @@ #define INSTANTIATE_ARRAY_SORT(T) \ template class octave_sort; \ - template class octave_sort; #define NO_INSTANTIATE_ARRAY_SORT(T) \ - template class vec_index; \ - template <> bool ascending_compare (T, T) { return true; } \ - template <> bool ascending_compare (const T *, const T *) \ - { return true; } \ - template <> bool descending_compare (T, T) { return true; } \ - template <> bool descending_compare (const T *, const T *) \ - { return true; } \ template <> Array Array::sort \ (octave_idx_type, sortmode) const { return *this; } \ template <> Array Array::sort (Array &sidx, \ diff -r 6e764b7317bd -r 314be237cd5b liboctave/ChangeLog --- a/liboctave/ChangeLog Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/ChangeLog Mon Feb 09 13:05:35 2009 +0100 @@ -1,3 +1,13 @@ +2009-02-09 Jaroslav Hajek + + * oct-sort.cc (octave_sort): Rewrite for optimizations. Allow + inlined comparison functor and by-the-way indexed sorting. + * oct-sort.h (octave_sort): Update interface. + * Array.cc (Array::sort): Reflect changes. Use copy & partition + mechanism. + * Array-d.cc, Array-f.cc, Array-C.cc, Array-fC.cc, Array-s.cc, + Array-i.cc: Reflect changes. + 2009-02-05 John W. Eaton * file-stat.cc (base_file_stat::is_sock): diff -r 6e764b7317bd -r 314be237cd5b liboctave/oct-sort.cc --- a/liboctave/oct-sort.cc Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/oct-sort.cc Mon Feb 09 13:05:35 2009 +0100 @@ -1,6 +1,6 @@ /* Copyright (C) 2003, 2004, 2005, 2006, 2007 David Bateman -Copyright (C) 2008 Jaroslav Hajek +Copyright (C) 2008, 2009 Jaroslav Hajek This file is part of Octave. @@ -33,6 +33,10 @@ memmove by std::copy is possible if the destination starts before the source. If not, std::copy_backward needs to be used. +* templatize comparison operator in most methods, provide possible dispatch + +* duplicate methods to avoid by-the-way indexed sorting + The Python license is @@ -98,12 +102,8 @@ #include "quit.h" #include "oct-sort.h" -#ifndef IFLT -#define IFLT(a,b) if (compare ? compare ((a), (b)) : ((a) < (b))) -#endif - template -octave_sort::octave_sort (void) : compare (0) +octave_sort::octave_sort (void) : compare (ascending_compare) { merge_init (); merge_getmem (1024); @@ -116,38 +116,20 @@ merge_getmem (1024); } -/* Reverse a slice of a list in place, from lo up to (exclusive) hi. */ template +template void -octave_sort::reverse_slice (T *lo, T *hi) +octave_sort::binarysort (T *data, octave_idx_type nel, + octave_idx_type start, Comp comp) { - --hi; - while (lo < hi) - { - T t = *lo; - *lo = *hi; - *hi = t; - ++lo; - --hi; - } -} - -template -void -octave_sort::binarysort (T *lo, T *hi, T *start) -{ - T *l, *p, *r; - T pivot; - - if (lo == start) + if (start == 0) ++start; - for (; start < hi; ++start) + for (; start < nel; ++start) { /* set l to where *start belongs */ - l = lo; - r = start; - pivot = *r; + octave_idx_type l = 0, r = start; + T pivot = data[start]; /* Invariants: * pivot >= all in [lo, l). * pivot < all in [r, start). @@ -155,8 +137,8 @@ */ do { - p = l + ((r - l) >> 1); - IFLT (pivot, *p) + octave_idx_type p = l + ((r - l) >> 1); + if (comp (pivot, data[p])) r = p; else l = p+1; @@ -169,9 +151,58 @@ Slide over to make room. Caution: using memmove is much slower under MSVC 5; we're not usually moving many slots. */ - for (p = start; p > l; --p) - *p = *(p-1); - *l = pivot; + // NOTE: using swap and going upwards appears to be faster. + for (octave_idx_type p = l; p < start; p++) + std::swap (pivot, data[p]); + data[start] = pivot; + } + + return; +} + +template +template +void +octave_sort::binarysort (T *data, octave_idx_type *idx, octave_idx_type nel, + octave_idx_type start, Comp comp) +{ + if (start == 0) + ++start; + + for (; start < nel; ++start) + { + /* set l to where *start belongs */ + octave_idx_type l = 0, r = start; + T pivot = data[start]; + /* Invariants: + * pivot >= all in [lo, l). + * pivot < all in [r, start). + * The second is vacuously true at the start. + */ + do + { + octave_idx_type p = l + ((r - l) >> 1); + if (comp (pivot, data[p])) + r = p; + else + l = p+1; + } + while (l < r); + /* The invariants still hold, so pivot >= all in [lo, l) and + pivot < all in [l, start), so pivot belongs at l. Note + that if there are elements equal to pivot, l points to the + first slot after them -- that's why this sort is stable. + Slide over to make room. + Caution: using memmove is much slower under MSVC 5; + we're not usually moving many slots. */ + // NOTE: using swap and going upwards appears to be faster. + for (octave_idx_type p = l; p < start; p++) + std::swap (pivot, data[p]); + data[start] = pivot; + octave_idx_type ipivot = idx[start]; + for (octave_idx_type p = l; p < start; p++) + std::swap (ipivot, idx[p]); + idx[start] = ipivot; } return; @@ -196,10 +227,12 @@ Returns -1 in case of error. */ template +template octave_idx_type -octave_sort::count_run (T *lo, T *hi, bool& descending) +octave_sort::count_run (T *lo, octave_idx_type nel, bool& descending, Comp comp) { octave_idx_type n; + T *hi = lo + nel; descending = false; ++lo; @@ -208,12 +241,12 @@ n = 2; - IFLT (*lo, *(lo-1)) + if (comp (*lo, *(lo-1))) { descending = true; for (lo = lo+1; lo < hi; ++lo, ++n) { - IFLT (*lo, *(lo-1)) + if (comp (*lo, *(lo-1))) ; else break; @@ -223,7 +256,7 @@ { for (lo = lo+1; lo < hi; ++lo, ++n) { - IFLT (*lo, *(lo-1)) + if (comp (*lo, *(lo-1))) break; } } @@ -253,8 +286,10 @@ Returns -1 on error. See listsort.txt for info on the method. */ template +template octave_idx_type -octave_sort::gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint) +octave_sort::gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint, + Comp comp) { octave_idx_type ofs; octave_idx_type lastofs; @@ -263,7 +298,7 @@ a += hint; lastofs = 0; ofs = 1; - IFLT (*a, key) + if (comp (*a, key)) { /* a[hint] < key -- gallop right, until * a[hint + lastofs] < key <= a[hint + ofs] @@ -271,7 +306,7 @@ const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ while (ofs < maxofs) { - IFLT (a[ofs], key) + if (comp (a[ofs], key)) { lastofs = ofs; ofs = (ofs << 1) + 1; @@ -295,7 +330,7 @@ const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ while (ofs < maxofs) { - IFLT (*(a-ofs), key) + if (comp (*(a-ofs), key)) break; /* key <= a[hint - ofs] */ lastofs = ofs; @@ -321,7 +356,7 @@ { octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); - IFLT (a[m], key) + if (comp (a[m], key)) lastofs = m+1; /* a[m] < key */ else ofs = m; /* key <= a[m] */ @@ -345,8 +380,10 @@ written as one routine with yet another "left or right?" flag. */ template +template octave_idx_type -octave_sort::gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint) +octave_sort::gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint, + Comp comp) { octave_idx_type ofs; octave_idx_type lastofs; @@ -355,7 +392,7 @@ a += hint; lastofs = 0; ofs = 1; - IFLT (key, *a) + if (comp (key, *a)) { /* key < a[hint] -- gallop left, until * a[hint - ofs] <= key < a[hint - lastofs] @@ -363,7 +400,7 @@ const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ while (ofs < maxofs) { - IFLT (key, *(a-ofs)) + if (comp (key, *(a-ofs))) { lastofs = ofs; ofs = (ofs << 1) + 1; @@ -388,7 +425,7 @@ const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ while (ofs < maxofs) { - IFLT (key, a[ofs]) + if (comp (key, a[ofs])) break; /* a[hint + ofs] <= key */ lastofs = ofs; @@ -413,7 +450,7 @@ { octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); - IFLT (key, a[m]) + if (comp (key, a[m])) ofs = m; /* key < a[m] */ else lastofs = m+1; /* a[m] <= key */ @@ -428,6 +465,7 @@ octave_sort::merge_init (void) { ms.a = 0; + ms.ia = 0; ms.alloced = 0; ms.n = 0; ms.min_gallop = MIN_GALLOP; @@ -442,6 +480,7 @@ octave_sort::merge_freemem (void) { delete [] ms.a; + delete [] ms.ia; ms.alloced = 0; ms.a = 0; } @@ -509,7 +548,29 @@ return -1; } -#define MERGE_GETMEM(NEED) ((NEED) <= ms.alloced ? 0 : merge_getmem (NEED)) +template +int +octave_sort::merge_getmemi (octave_idx_type need) +{ + if (need <= ms.alloced && ms.a && ms.ia) + return 0; + + need = roundupsize (need); + /* Don't realloc! That can cost cycles to copy the old data, but + * we don't care what's in the block. + */ + merge_freemem (); + ms.a = new T[need]; + ms.ia = new octave_idx_type[need]; + if (ms.a && ms.ia) + { + ms.alloced = need; + return 0; + } + merge_freemem (); /* reset to sane state */ + + return -1; +} /* Merge the na elements starting at pa with the nb elements starting at pb * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. @@ -518,15 +579,18 @@ * Return 0 if successful, -1 if error. */ template +template int -octave_sort::merge_lo (T *pa, octave_idx_type na, T *pb, octave_idx_type nb) +octave_sort::merge_lo (T *pa, octave_idx_type na, + T *pb, octave_idx_type nb, + Comp comp) { octave_idx_type k; T *dest; int result = -1; /* guilty until proved innocent */ octave_idx_type min_gallop = ms.min_gallop; - if (MERGE_GETMEM (na) < 0) + if (merge_getmem (na) < 0) return -1; std::copy (pa, pa + na, ms.a); dest = pa; @@ -550,7 +614,10 @@ for (;;) { - IFLT (*pb, *pa) + // FIXME: these loops are candidates for further optimizations. + // Rather than testing everything in each cycle, it may be more + // efficient to do it in hunks. + if (comp (*pb, *pa)) { *dest++ = *pb++; ++bcount; @@ -584,14 +651,13 @@ { min_gallop -= min_gallop > 1; ms.min_gallop = min_gallop; - k = gallop_right (*pb, pa, na, 0); + k = gallop_right (*pb, pa, na, 0, comp); acount = k; if (k) { if (k < 0) goto Fail; - std::copy (pa, pa + k, dest); - dest += k; + dest = std::copy (pa, pa + k, dest); pa += k; na -= k; if (na == 1) @@ -608,14 +674,13 @@ if (nb == 0) goto Succeed; - k = gallop_left (*pa, pb, nb, 0); + k = gallop_left (*pa, pb, nb, 0, comp); bcount = k; if (k) { if (k < 0) goto Fail; - std::copy (pb, pb + k, dest); - dest += k; + dest = std::copy (pb, pb + k, dest); pb += k; nb -= k; if (nb == 0) @@ -648,6 +713,147 @@ return 0; } +template +template +int +octave_sort::merge_lo (T *pa, octave_idx_type *ipa, octave_idx_type na, + T *pb, octave_idx_type *ipb, octave_idx_type nb, + Comp comp) +{ + octave_idx_type k; + T *dest; + octave_idx_type *idest; + int result = -1; /* guilty until proved innocent */ + octave_idx_type min_gallop = ms.min_gallop; + + if (merge_getmemi (na) < 0) + return -1; + std::copy (pa, pa + na, ms.a); + std::copy (ipa, ipa + na, ms.ia); + dest = pa; idest = ipa; + pa = ms.a; ipa = ms.ia; + + *dest++ = *pb++; *idest++ = *ipb++; + --nb; + if (nb == 0) + goto Succeed; + if (na == 1) + goto CopyB; + + for (;;) + { + octave_idx_type acount = 0; /* # of times A won in a row */ + octave_idx_type bcount = 0; /* # of times B won in a row */ + + /* Do the straightforward thing until (if ever) one run + * appears to win consistently. + */ + for (;;) + { + + if (comp (*pb, *pa)) + { + *dest++ = *pb++; *idest++ = *ipb++; + ++bcount; + acount = 0; + --nb; + if (nb == 0) + goto Succeed; + if (bcount >= min_gallop) + break; + } + else + { + *dest++ = *pa++; *idest++ = *ipa++; + ++acount; + bcount = 0; + --na; + if (na == 1) + goto CopyB; + if (acount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may + * be a huge win. So try that, and continue galloping until + * (if ever) neither run appears to be winning consistently + * anymore. + */ + ++min_gallop; + do + { + min_gallop -= min_gallop > 1; + ms.min_gallop = min_gallop; + k = gallop_right (*pb, pa, na, 0, comp); + acount = k; + if (k) + { + if (k < 0) + goto Fail; + dest = std::copy (pa, pa + k, dest); + idest = std::copy (ipa, ipa + k, idest); + pa += k; ipa += k; + na -= k; + if (na == 1) + goto CopyB; + /* na==0 is impossible now if the comparison + * function is consistent, but we can't assume + * that it is. + */ + if (na == 0) + goto Succeed; + } + *dest++ = *pb++; *idest++ = *ipb++; + --nb; + if (nb == 0) + goto Succeed; + + k = gallop_left (*pa, pb, nb, 0, comp); + bcount = k; + if (k) + { + if (k < 0) + goto Fail; + dest = std::copy (pb, pb + k, dest); + idest = std::copy (ipb, ipb + k, idest); + pb += k; ipb += k; + nb -= k; + if (nb == 0) + goto Succeed; + } + *dest++ = *pa++; *idest++ = *ipa++; + --na; + if (na == 1) + goto CopyB; + } + while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); + + ++min_gallop; /* penalize it for leaving galloping mode */ + ms.min_gallop = min_gallop; + } + + Succeed: + result = 0; + + Fail: + if (na) + { + std::copy (pa, pa + na, dest); + std::copy (ipa, ipa + na, idest); + } + return result; + + CopyB: + /* The last element of pa belongs at the end of the merge. */ + std::copy (pb, pb + nb, dest); + std::copy (ipb, ipb + nb, idest); + dest[nb] = *pa; + idest[nb] = *ipa; + + return 0; +} + /* Merge the na elements starting at pa with the nb elements starting at pb * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the @@ -655,17 +861,19 @@ * Return 0 if successful, -1 if error. */ template +template int -octave_sort::merge_hi (T *pa, octave_idx_type na, T *pb, octave_idx_type nb) +octave_sort::merge_hi (T *pa, octave_idx_type na, + T *pb, octave_idx_type nb, + Comp comp) { octave_idx_type k; T *dest; int result = -1; /* guilty until proved innocent */ - T *basea; - T *baseb; + T *basea, *baseb; octave_idx_type min_gallop = ms.min_gallop; - if (MERGE_GETMEM (nb) < 0) + if (merge_getmem (nb) < 0) return -1; dest = pb + nb - 1; std::copy (pb, pb + nb, ms.a); @@ -691,7 +899,7 @@ */ for (;;) { - IFLT (*pb, *pa) + if (comp (*pb, *pa)) { *dest-- = *pa--; ++acount; @@ -725,16 +933,15 @@ { min_gallop -= min_gallop > 1; ms.min_gallop = min_gallop; - k = gallop_right (*pb, basea, na, na-1); + k = gallop_right (*pb, basea, na, na-1, comp); if (k < 0) goto Fail; k = na - k; acount = k; if (k) { - dest -= k; + dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1; pa -= k; - std::copy_backward (pa+1, pa+1 + k, dest+1 + k); na -= k; if (na == 0) goto Succeed; @@ -744,7 +951,7 @@ if (nb == 1) goto CopyA; - k = gallop_left (*pa, baseb, nb, nb-1); + k = gallop_left (*pa, baseb, nb, nb-1, comp); if (k < 0) goto Fail; k = nb - k; @@ -783,28 +990,176 @@ CopyA: /* The first element of pb belongs at the front of the merge. */ - dest -= na; + dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1; pa -= na; - std::copy_backward (pa+1, pa+1 + na, dest+1 + na); *dest = *pb; return 0; } +template +template +int +octave_sort::merge_hi (T *pa, octave_idx_type *ipa, octave_idx_type na, + T *pb, octave_idx_type *ipb, octave_idx_type nb, + Comp comp) +{ + octave_idx_type k; + T *dest; + octave_idx_type *idest; + int result = -1; /* guilty until proved innocent */ + T *basea, *baseb; + octave_idx_type *ibasea, *ibaseb; + octave_idx_type min_gallop = ms.min_gallop; + + if (merge_getmemi (nb) < 0) + return -1; + dest = pb + nb - 1; + idest = ipb + nb - 1; + std::copy (pb, pb + nb, ms.a); + std::copy (ipb, ipb + nb, ms.ia); + basea = pa; ibasea = ipa; + baseb = ms.a; ibaseb = ms.ia; + pb = ms.a + nb - 1; ipb = ms.ia + nb - 1; + pa += na - 1; ipa += na - 1; + + *dest-- = *pa--; *idest-- = *ipa--; + --na; + if (na == 0) + goto Succeed; + if (nb == 1) + goto CopyA; + + for (;;) + { + octave_idx_type acount = 0; /* # of times A won in a row */ + octave_idx_type bcount = 0; /* # of times B won in a row */ + + /* Do the straightforward thing until (if ever) one run + * appears to win consistently. + */ + for (;;) + { + if (comp (*pb, *pa)) + { + *dest-- = *pa--; *idest-- = *ipa--; + ++acount; + bcount = 0; + --na; + if (na == 0) + goto Succeed; + if (acount >= min_gallop) + break; + } + else + { + *dest-- = *pb--; *idest-- = *ipb--; + ++bcount; + acount = 0; + --nb; + if (nb == 1) + goto CopyA; + if (bcount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may + * be a huge win. So try that, and continue galloping until + * (if ever) neither run appears to be winning consistently + * anymore. + */ + ++min_gallop; + do + { + min_gallop -= min_gallop > 1; + ms.min_gallop = min_gallop; + k = gallop_right (*pb, basea, na, na-1, comp); + if (k < 0) + goto Fail; + k = na - k; + acount = k; + if (k) + { + dest = std::copy_backward (pa+1 - k, pa+1, dest+1) - 1; + idest = std::copy_backward (ipa+1 - k, ipa+1, idest+1) - 1; + pa -= k; ipa -= k; + na -= k; + if (na == 0) + goto Succeed; + } + *dest-- = *pb--; *idest-- = *ipb--; + --nb; + if (nb == 1) + goto CopyA; + + k = gallop_left (*pa, baseb, nb, nb-1, comp); + if (k < 0) + goto Fail; + k = nb - k; + bcount = k; + if (k) + { + dest -= k; idest -= k; + pb -= k; ipb -= k; + std::copy (pb+1, pb+1 + k, dest+1); + std::copy (ipb+1, ipb+1 + k, idest+1); + nb -= k; + if (nb == 1) + goto CopyA; + /* nb==0 is impossible now if the comparison + * function is consistent, but we can't assume + * that it is. + */ + if (nb == 0) + goto Succeed; + } + *dest-- = *pa--; *idest-- = *ipa--; + --na; + if (na == 0) + goto Succeed; + } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); + ++min_gallop; /* penalize it for leaving galloping mode */ + ms.min_gallop = min_gallop; + } + +Succeed: + result = 0; + +Fail: + if (nb) + { + std::copy (baseb, baseb + nb, dest-(nb-1)); + std::copy (ibaseb, ibaseb + nb, idest-(nb-1)); + } + return result; + +CopyA: + /* The first element of pb belongs at the front of the merge. */ + dest = std::copy_backward (pa+1 - na, pa+1, dest+1) - 1; + idest = std::copy_backward (ipa+1 - na, ipa+1, idest+1) - 1; + pa -= na; ipa -= na; + *dest = *pb; *idest = *ipb; + + return 0; +} + /* Merge the two runs at stack indices i and i+1. * Returns 0 on success, -1 on error. */ template +template int -octave_sort::merge_at (octave_idx_type i) +octave_sort::merge_at (octave_idx_type i, T *data, + Comp comp) { T *pa, *pb; octave_idx_type na, nb; octave_idx_type k; - pa = ms.pending[i].base; + pa = data + ms.pending[i].base; na = ms.pending[i].len; - pb = ms.pending[i+1].base; + pb = data + ms.pending[i+1].base; nb = ms.pending[i+1].len; /* Record the length of the combined runs; if i is the 3rd-last @@ -819,7 +1174,7 @@ /* Where does b start in a? Elements in a before that can be * ignored (already in place). */ - k = gallop_right (*pb, pa, na, 0); + k = gallop_right (*pb, pa, na, 0, comp); if (k < 0) return -1; pa += k; @@ -830,7 +1185,7 @@ /* Where does a end in b? Elements in b after that can be * ignored (already in place). */ - nb = gallop_left (pa[na-1], pb, nb, nb-1); + nb = gallop_left (pa[na-1], pb, nb, nb-1, comp); if (nb <= 0) return nb; @@ -838,9 +1193,63 @@ * min(na, nb) elements. */ if (na <= nb) - return merge_lo (pa, na, pb, nb); + return merge_lo (pa, na, pb, nb, comp); else - return merge_hi (pa, na, pb, nb); + return merge_hi (pa, na, pb, nb, comp); +} + +template +template +int +octave_sort::merge_at (octave_idx_type i, T *data, octave_idx_type *idx, + Comp comp) +{ + T *pa, *pb; + octave_idx_type *ipa, *ipb; + octave_idx_type na, nb; + octave_idx_type k; + + pa = data + ms.pending[i].base; + ipa = idx + ms.pending[i].base; + na = ms.pending[i].len; + pb = data + ms.pending[i+1].base; + ipb = idx + ms.pending[i+1].base; + nb = ms.pending[i+1].len; + + /* Record the length of the combined runs; if i is the 3rd-last + * run now, also slide over the last run (which isn't involved + * in this merge). The current run i+1 goes away in any case. + */ + ms.pending[i].len = na + nb; + if (i == ms.n - 3) + ms.pending[i+1] = ms.pending[i+2]; + --ms.n; + + /* Where does b start in a? Elements in a before that can be + * ignored (already in place). + */ + k = gallop_right (*pb, pa, na, 0, comp); + if (k < 0) + return -1; + pa += k; ipa += k; + na -= k; + if (na == 0) + return 0; + + /* Where does a end in b? Elements in b after that can be + * ignored (already in place). + */ + nb = gallop_left (pa[na-1], pb, nb, nb-1, comp); + if (nb <= 0) + return nb; + + /* Merge what remains of the runs, using a temp array with + * min(na, nb) elements. + */ + if (na <= nb) + return merge_lo (pa, ipa, na, pb, ipb, nb, comp); + else + return merge_hi (pa, ipa, na, pb, ipb, nb, comp); } /* Examine the stack of runs waiting to be merged, merging adjacent runs @@ -854,8 +1263,9 @@ * Returns 0 on success, -1 on error. */ template +template int -octave_sort::merge_collapse (void) +octave_sort::merge_collapse (T *data, Comp comp) { struct s_slice *p = ms.pending; @@ -866,12 +1276,41 @@ { if (p[n-1].len < p[n+1].len) --n; - if (merge_at (n) < 0) + if (merge_at (n, data, comp) < 0) return -1; } else if (p[n].len <= p[n+1].len) { - if (merge_at (n) < 0) + if (merge_at (n, data, comp) < 0) + return -1; + } + else + break; + } + + return 0; +} + +template +template +int +octave_sort::merge_collapse (T *data, octave_idx_type *idx, Comp comp) +{ + struct s_slice *p = ms.pending; + + while (ms.n > 1) + { + octave_idx_type n = ms.n - 2; + if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) + { + if (p[n-1].len < p[n+1].len) + --n; + if (merge_at (n, data, idx, comp) < 0) + return -1; + } + else if (p[n].len <= p[n+1].len) + { + if (merge_at (n, data, idx, comp) < 0) return -1; } else @@ -887,8 +1326,9 @@ * Returns 0 on success, -1 on error. */ template +template int -octave_sort::merge_force_collapse (void) +octave_sort::merge_force_collapse (T *data, Comp comp) { struct s_slice *p = ms.pending; @@ -897,7 +1337,26 @@ octave_idx_type n = ms.n - 2; if (n > 0 && p[n-1].len < p[n+1].len) --n; - if (merge_at (n) < 0) + if (merge_at (n, data, comp) < 0) + return -1; + } + + return 0; +} + +template +template +int +octave_sort::merge_force_collapse (T *data, octave_idx_type *idx, Comp comp) +{ + struct s_slice *p = ms.pending; + + while (ms.n > 1) + { + octave_idx_type n = ms.n - 2; + if (n > 0 && p[n-1].len < p[n+1].len) + --n; + if (merge_at (n, data, idx, comp) < 0) return -1; } @@ -930,18 +1389,75 @@ } template +template void -octave_sort::sort (T *v, octave_idx_type elements) +octave_sort::sort (T *data, octave_idx_type nel, Comp comp) { /* Re-initialize the Mergestate as this might be the second time called */ ms.n = 0; ms.min_gallop = MIN_GALLOP; - if (elements > 1) + if (nel > 1) { - octave_idx_type nremaining = elements; - T *lo = v; - T *hi = v + elements; + octave_idx_type nremaining = nel; + octave_idx_type lo = 0; + + /* March over the array once, left to right, finding natural runs, + * and extending short natural runs to minrun elements. + */ + octave_idx_type minrun = merge_compute_minrun (nremaining); + do + { + bool descending; + octave_idx_type n; + + /* Identify next run. */ + n = count_run (data + lo, nremaining, descending, comp); + if (n < 0) + goto fail; + if (descending) + std::reverse (data + lo, data + lo + n); + /* If short, extend to min(minrun, nremaining). */ + if (n < minrun) + { + const octave_idx_type force = nremaining <= minrun ? nremaining : minrun; + binarysort (data + lo, force, n, comp); + n = force; + } + /* Push run onto pending-runs stack, and maybe merge. */ + assert (ms.n < MAX_MERGE_PENDING); + ms.pending[ms.n].base = lo; + ms.pending[ms.n].len = n; + ++ms.n; + if (merge_collapse (data, comp) < 0) + goto fail; + /* Advance to find next run. */ + lo += n; + nremaining -= n; + } + while (nremaining); + + merge_force_collapse (data, comp); + } + +fail: + return; +} + +template +template +void +octave_sort::sort (T *data, octave_idx_type *idx, octave_idx_type nel, + Comp comp) +{ + /* Re-initialize the Mergestate as this might be the second time called */ + ms.n = 0; + ms.min_gallop = MIN_GALLOP; + + if (nel > 1) + { + octave_idx_type nremaining = nel; + octave_idx_type lo = 0; /* March over the array once, left to right, finding natural runs, * and extending short natural runs to minrun elements. @@ -953,16 +1469,19 @@ octave_idx_type n; /* Identify next run. */ - n = count_run (lo, hi, descending); + n = count_run (data + lo, nremaining, descending, comp); if (n < 0) goto fail; if (descending) - reverse_slice (lo, lo + n); + { + std::reverse (data + lo, data + lo + n); + std::reverse (idx + lo, idx + lo + n); + } /* If short, extend to min(minrun, nremaining). */ if (n < minrun) { const octave_idx_type force = nremaining <= minrun ? nremaining : minrun; - binarysort (lo, lo + force, lo + n); + binarysort (data + lo, idx + lo, force, n, comp); n = force; } /* Push run onto pending-runs stack, and maybe merge. */ @@ -970,7 +1489,7 @@ ms.pending[ms.n].base = lo; ms.pending[ms.n].len = n; ++ms.n; - if (merge_collapse () < 0) + if (merge_collapse (data, idx, comp) < 0) goto fail; /* Advance to find next run. */ lo += n; @@ -978,13 +1497,63 @@ } while (nremaining); - merge_force_collapse (); + merge_force_collapse (data, idx, comp); } fail: return; } +template +void +octave_sort::sort (T *data, octave_idx_type nel) +{ +#ifdef INLINE_ASCENDING_SORT + if (compare == ascending_compare) + sort (data, nel, std::less ()); + else +#endif +#ifdef INLINE_DESCENDING_SORT + if (compare == descending_compare) + sort (data, nel, std::greater ()); + else +#endif + if (compare) + sort (data, nel, compare); +} + +template +void +octave_sort::sort (T *data, octave_idx_type *idx, octave_idx_type nel) +{ +#ifdef INLINE_ASCENDING_SORT + if (compare == ascending_compare) + sort (data, idx, nel, std::less ()); + else +#endif +#ifdef INLINE_DESCENDING_SORT + if (compare == descending_compare) + sort (data, idx, nel, std::greater ()); + else +#endif + if (compare) + sort (data, idx, nel, compare); +} + +template +bool +octave_sort::ascending_compare (T x, T y) +{ + return x < y; +} + +template +bool +octave_sort::descending_compare (T x, T y) +{ + return x > y; +} + /* ;;; Local Variables: *** ;;; mode: C++ *** diff -r 6e764b7317bd -r 314be237cd5b liboctave/oct-sort.h --- a/liboctave/oct-sort.h Mon Feb 09 01:56:06 2009 -0500 +++ b/liboctave/oct-sort.h Mon Feb 09 13:05:35 2009 +0100 @@ -113,7 +113,12 @@ void set_compare (bool (*comp) (T, T)) { compare = comp; } - void sort (T *v, octave_idx_type elements); + void sort (T *data, octave_idx_type nel); + + void sort (T *data, octave_idx_type *idx, octave_idx_type nel); + + static bool ascending_compare (T, T); + static bool descending_compare (T, T); private: @@ -127,8 +132,7 @@ struct s_slice { - T *base; - octave_idx_type len; + octave_idx_type base, len; }; struct MergeState @@ -142,6 +146,7 @@ // 'a' is temp storage to help with merges. It contains room for // alloced entries. T *a; // may point to temparray below + octave_idx_type *ia; octave_idx_type alloced; // A stack of n pending runs yet to be merged. Run #i starts at @@ -160,15 +165,25 @@ MergeState ms; - void reverse_slice (T *lo, T *hi); - - void binarysort (T *lo, T *hi, T *start); + + template + void binarysort (T *data, octave_idx_type nel, + octave_idx_type start, Comp comp); + + template + void binarysort (T *data, octave_idx_type *idx, octave_idx_type nel, + octave_idx_type start, Comp comp); - octave_idx_type count_run (T *lo, T *hi, bool& descending); + template + octave_idx_type count_run (T *lo, octave_idx_type n, bool& descending, Comp comp); - octave_idx_type gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint); + template + octave_idx_type gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint, + Comp comp); - octave_idx_type gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint); + template + octave_idx_type gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint, + Comp comp); void merge_init (void); @@ -176,17 +191,56 @@ int merge_getmem (octave_idx_type need); - int merge_lo (T *pa, octave_idx_type na, T *pb, octave_idx_type nb); + int merge_getmemi (octave_idx_type need); + + template + int merge_lo (T *pa, octave_idx_type na, + T *pb, octave_idx_type nb, + Comp comp); - int merge_hi (T *pa, octave_idx_type na, T *pb, octave_idx_type nb); + template + int merge_lo (T *pa, octave_idx_type *ipa, octave_idx_type na, + T *pb, octave_idx_type *ipb, octave_idx_type nb, + Comp comp); + + template + int merge_hi (T *pa, octave_idx_type na, + T *pb, octave_idx_type nb, + Comp comp); - int merge_at (octave_idx_type i); + template + int merge_hi (T *pa, octave_idx_type *ipa, octave_idx_type na, + T *pb, octave_idx_type *ipb, octave_idx_type nb, + Comp comp); + + template + int merge_at (octave_idx_type i, T *data, + Comp comp); - int merge_collapse (void); + template + int merge_at (octave_idx_type i, T *data, octave_idx_type *idx, + Comp comp); + + template + int merge_collapse (T *data, Comp comp); - int merge_force_collapse (void); + template + int merge_collapse (T *data, octave_idx_type *idx, Comp comp); + + template + int merge_force_collapse (T *data, Comp comp); + + template + int merge_force_collapse (T *data, octave_idx_type *idx, Comp comp); octave_idx_type merge_compute_minrun (octave_idx_type n); + + template + void sort (T *data, octave_idx_type nel, Comp comp); + + template + void sort (T *data, octave_idx_type *idx, octave_idx_type nel, Comp comp); + }; template diff -r 6e764b7317bd -r 314be237cd5b src/ChangeLog --- a/src/ChangeLog Mon Feb 09 01:56:06 2009 -0500 +++ b/src/ChangeLog Mon Feb 09 13:05:35 2009 +0100 @@ -1,3 +1,7 @@ +2009-02-09 Jaroslav Hajek + + * TEMPLATE-INST/Array-tc.cc: Reflect changes in octave_sort. + 2009-02-08 John W. Eaton * octave.cc (initialize_pathsearch): Delete. diff -r 6e764b7317bd -r 314be237cd5b src/TEMPLATE-INST/Array-tc.cc --- a/src/TEMPLATE-INST/Array-tc.cc Mon Feb 09 01:56:06 2009 -0500 +++ b/src/TEMPLATE-INST/Array-tc.cc Mon Feb 09 13:05:35 2009 +0100 @@ -40,38 +40,22 @@ #include "oct-obj.h" -#define IFLT(a, b) if (compare ? compare ((a), (b)) : true) - #include "oct-sort.cc" template <> bool -ascending_compare (octave_value a, octave_value b) +octave_sort::ascending_compare (octave_value a, octave_value b) { return (a.string_value () < b.string_value ()); } template <> bool -ascending_compare (const octave_value *a, const octave_value *b) -{ - return (a->string_value () < b->string_value ()); -} - -template <> -bool -descending_compare (octave_value a, octave_value b) +octave_sort::descending_compare (octave_value a, octave_value b) { return (a.string_value () > b.string_value ()); } -template <> -bool -descending_compare (const octave_value *a, const octave_value *b) -{ - return (a->string_value () > b->string_value ()); -} - INSTANTIATE_ARRAY_SORT (octave_value); template class OCTINTERP_API Array;