changeset 9407:0951174cbb03

remove experimental stuff from lookup, simplify
author Jaroslav Hajek <highegg@gmail.com>
date Mon, 29 Jun 2009 14:07:32 +0200
parents c0c23dbbade7
children 6729708602ca
files ChangeLog NEWS liboctave/ChangeLog liboctave/oct-sort.cc liboctave/oct-sort.h
diffstat 5 files changed, 110 insertions(+), 144 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Sun Jun 28 23:00:53 2009 +0100
+++ b/ChangeLog	Mon Jun 29 14:07:32 2009 +0200
@@ -1,3 +1,7 @@
+2009-06-29  Jaroslav Hajek  <highegg@gmail.com>
+
+	* NEWS: Correct info.
+
 2009-06-26  Michael Goffioul  <michael.goffioul@gmail.com>
 
 	* aclocal.m4: Add pkg.m4 macros.
--- a/NEWS	Sun Jun 28 23:00:53 2009 +0100
+++ b/NEWS	Mon Jun 29 14:07:32 2009 +0200
@@ -3,9 +3,7 @@
 
  ** The `lookup' function was extended to be more useful for general-purpose
     binary searching. Using this improvement, the ismember function was
-    rewritten for significantly better performance. The underlying algorithm
-    of lookup was also improved and is now much faster in certain important 
-    cases. 
+    rewritten for significantly better performance.
 
  ** Real, integer and logical matrices, when used in indexing, will now
     cache the internal index_vector value (zero-based indices) when
--- a/liboctave/ChangeLog	Sun Jun 28 23:00:53 2009 +0100
+++ b/liboctave/ChangeLog	Mon Jun 29 14:07:32 2009 +0200
@@ -1,3 +1,10 @@
+2009-06-29  Jaroslav Hajek  <highegg@gmail.com>
+
+	* oct-sort.cc (octave_sort<T>::lookup_merge): Delete.
+	(octave_sort<T>::lookup<Comp>,
+	octave_sort<T>::lookupm<Comp>,
+	octave_sort<T>::lookupb<Comp>): Rewrite.
+
 2009-06-26  Michael Goffioul  <michael.goffioul@gmail.com>
 
 	* pathsearch.h (class dir_path::static_members): Decorate with
--- a/liboctave/oct-sort.cc	Sun Jun 28 23:00:53 2009 +0100
+++ b/liboctave/oct-sort.cc	Mon Jun 29 14:07:32 2009 +0200
@@ -1742,12 +1742,14 @@
   return retval;
 }
 
-// A helper function (just to avoid repeating expressions).
+// The simple binary lookup.
 template <class T, class Comp>
 inline octave_idx_type
-lookup_impl (const T *data, octave_idx_type lo,
-             octave_idx_type hi, const T& val, Comp comp)
+lookup_binary (const T *data, octave_idx_type hi, 
+               const T& val, Comp comp)
 {
+  octave_idx_type lo = 0;
+
   while (lo < hi)
     {
       octave_idx_type mid = lo + ((hi-lo) >> 1);
@@ -1760,13 +1762,12 @@
   return lo;
 }
 
-
 template <class T> template <class Comp>
 octave_idx_type 
 octave_sort<T>::lookup (const T *data, octave_idx_type nel,
                         const T& value, Comp comp)
 {
-  return lookup_impl (data, 0, nel, value, comp);
+  return lookup_binary (data, nel, value, comp);
 }
 
 template <class T>
@@ -1792,105 +1793,8 @@
   return retval;
 }
 
-// A recursively splitting lookup of a sorted array in another sorted array.
-template <class T> template <class Comp>
-void 
-octave_sort<T>::lookup_merge (const T *data, octave_idx_type lo, octave_idx_type hi,
-                              const T *values, octave_idx_type nvalues,
-                              octave_idx_type *idx, Comp comp)
-{
-  // Fake entry.
-begin:
-
-  if (hi - lo <= nvalues + 16)
-    {
-      // Do a linear merge.
-      octave_idx_type i = lo, j = 0;
-
-      if (j != nvalues && i != hi)
-        {
-          while (true)
-            {
-              if (comp (values[j], data[i]))
-                {
-                  idx[j] = i;
-                  if (++j == nvalues)
-                    break;
-                }
-              else if (++i == hi)
-                break;
-            }
-        }
-
-      while (j != nvalues)
-        idx[j++] = i;
-    }
-  else
-    {
-      // Use the ordering to split the table; look-up recursively.
-      switch (nvalues)
-        {
-        case 0:
-          break;
-        case 1:
-          idx[0] = lookup_impl (data,     lo,     hi, values[0], comp);
-          break;
-        case 2:
-          idx[0] = lookup_impl (data,     lo,     hi, values[0], comp);
-          idx[1] = lookup_impl (data, idx[0],     hi, values[1], comp);
-          break;
-        case 3:
-          idx[1] = lookup_impl (data,     lo,     hi, values[1], comp);
-          idx[0] = lookup_impl (data,     lo, idx[1], values[0], comp);
-          idx[2] = lookup_impl (data, idx[1],     hi, values[2], comp);
-          break;
-        case 4:
-          idx[2] = lookup_impl (data,     lo,     hi, values[2], comp);
-          idx[0] = lookup_impl (data,     lo, idx[2], values[0], comp);
-          idx[1] = lookup_impl (data, idx[0], idx[2], values[1], comp);
-          idx[3] = lookup_impl (data, idx[2],     hi, values[3], comp);
-          break;
-        case 5:
-          idx[2] = lookup_impl (data,     lo,     hi, values[2], comp);
-          idx[0] = lookup_impl (data,     lo, idx[2], values[0], comp);
-          idx[1] = lookup_impl (data, idx[0], idx[2], values[1], comp);
-          idx[3] = lookup_impl (data, idx[2],     hi, values[3], comp);
-          idx[4] = lookup_impl (data, idx[3],     hi, values[4], comp);
-          break;
-        default:
-            {
-              // This is a bit tricky. We do not want a normal recursion
-              // because we split by idx[k], and there's no guaranteed descend
-              // ratio; hence the worst-case scenario would be a linear stack
-              // growth, which is dangerous. Instead, we recursively run the
-              // smaller half, and simulate a tail call for the rest using
-              // goto.
-              octave_idx_type k = nvalues / 2;
-              idx[k] = lookup_impl (data, lo, hi, values[k], comp);
-              if (idx[k] - lo <= hi - idx[k])
-                {
-                  // The smaller portion is run recursively.
-                  lookup_merge (data, lo, idx[k], values, k, idx, comp);
-                  // Simulate a tail call.
-                  lo = idx[k];
-                  values += k; nvalues -= k; idx += k;
-                  goto begin;
-                }
-              else
-                {
-                  // The smaller portion is run recursively.
-                  lookup_merge (data, idx[k], hi, 
-                                values + k, nvalues - k, idx + k, comp);
-                  // Simulate a tail call.
-                  hi = idx[k];
-                  nvalues = k;
-                  goto begin;
-                }
-              break;
-            }
-        }
-    }
-}
+// This determines the split ratio between the O(M*log2(N)) and O(M+N) algorithms.
+static const double ratio = 1.0;
 
 template <class T> template <class Comp>
 void 
@@ -1898,39 +1802,37 @@
                         const T *values, octave_idx_type nvalues,
                         octave_idx_type *idx, octave_idx_type offset, Comp comp)
 {
-  if (nel == 0)
-    // the trivial case of empty table
-    std::fill_n (idx, nvalues, offset);
-  else
+  // Check whether we're comparing two sorted arrays, comparable in size.
+  if (nvalues >= ratio * nel / xlog2 (nel + 1.0) 
+      && is_sorted (values, nvalues, comp))
     {
-      octave_idx_type vlo = 0;
-      int nbadruns = 0;
-      while (vlo != nvalues && nbadruns < 16)
+      // Use the linear algorithm.
+      octave_idx_type i = 0, j = 0;
+
+      if (j != nvalues && i != nel)
         {
-          octave_idx_type vhi;
-
-          // Determine a sorted run.
-          for (vhi = vlo + 1; vhi != nvalues; vhi++)
+          while (true)
             {
-              if (comp (values[vhi], values[vhi-1]))
+              if (comp (values[j], data[i]))
+                {
+                  idx[j] = i + offset;
+                  if (++j == nvalues)
+                    break;
+                }
+              else if (++i == nel)
                 break;
             }
-
-          // Do a recursive lookup.
-          lookup_merge (data - offset, offset, nel + offset,
-                        values + vlo, vhi - vlo, idx + vlo, comp);
-
-          if (vhi - vlo <= 2)
-            nbadruns++;
-          else if (vhi - vlo >= 16)
-            nbadruns = 0;
-
-          vlo = vhi;
         }
 
-      // The optimistic loop terminated, do the rest normally.
-      for (; vlo != nvalues; vlo++)
-        idx[vlo] = lookup_impl (data, 0, nel, values[vlo], comp) + offset;
+      for (; j != nvalues; j++)
+        idx[j] = i + offset;
+
+    }
+  else
+    {
+      // Use a sequence of binary lookups.
+      for (octave_idx_type j = 0; j < nvalues; j++)
+        idx[j] = lookup_binary (data, nel, values[j], comp) + offset;
     }
 }
 
@@ -1960,10 +1862,40 @@
                          const T *values, octave_idx_type nvalues,
                          octave_idx_type *idx, Comp comp)
 {
-  for (octave_idx_type i = 0; i < nvalues; i++)
+  // Check whether we're comparing two sorted arrays, comparable in size.
+  if (nvalues >= ratio * nel / xlog2 (nel + 1.0) 
+      && is_sorted (values, nvalues, comp))
     {
-      octave_idx_type j = lookup_impl (data, 0, nel, values[i], comp);
-      idx[i] = (j != 0 && comp (data[j-1], values[i])) ? -1 : j-1;
+      // Use the linear algorithm.
+      octave_idx_type i = 0, j = 0;
+
+      if (j != nvalues && i != nel)
+        {
+          while (true)
+            {
+              if (comp (values[j], data[i]))
+                {
+                  idx[j] = (i != 0 && comp (data[i-1], values[j])) ? -1 : i-1;
+                  if (++j == nvalues)
+                    break;
+                }
+              else if (++i == nel)
+                break;
+            }
+        }
+
+      for (; j != nvalues; j++)
+        idx[j] = (i != 0 && comp (data[i-1], values[j])) ? -1 : i-1;
+
+    }
+  else
+    {
+      // Use a sequence of binary lookups.
+      for (octave_idx_type j = 0; j < nvalues; j++)
+        {
+          octave_idx_type i = lookup_binary (data, nel, values[j], comp);
+          idx[j] = (i != 0 && comp (data[i-1], values[j])) ? -1 : i-1;
+        }
     }
 }
 
@@ -1993,10 +1925,40 @@
                          const T *values, octave_idx_type nvalues,
                          bool *match, Comp comp)
 {
-  for (octave_idx_type i = 0; i < nvalues; i++)
+  // Check whether we're comparing two sorted arrays, comparable in size.
+  if (nvalues >= ratio * nel / xlog2 (nel + 1.0) 
+      && is_sorted (values, nvalues, comp))
     {
-      octave_idx_type j = lookup_impl (data, 0, nel, values[i], comp);
-      match[i] = (j != 0 && ! comp (data[j-1], values[i]));
+      // Use the linear algorithm.
+      octave_idx_type i = 0, j = 0;
+
+      if (j != nvalues && i != nel)
+        {
+          while (true)
+            {
+              if (comp (values[j], data[i]))
+                {
+                  match[j] = (i != 0 && ! comp (data[i-1], values[j]));
+                  if (++j == nvalues)
+                    break;
+                }
+              else if (++i == nel)
+                break;
+            }
+        }
+
+      for (; j != nvalues; j++)
+        match[j] = (i != 0 && ! comp (data[i-1], values[j]));
+
+    }
+  else
+    {
+      // Use a sequence of binary lookups.
+      for (octave_idx_type j = 0; j < nvalues; j++)
+        {
+          octave_idx_type i = lookup_binary (data, nel, values[j], comp);
+          match[j] = (j != 0 && ! comp (data[i-1], values[j]));
+        }
     }
 }
 
--- a/liboctave/oct-sort.h	Sun Jun 28 23:00:53 2009 +0100
+++ b/liboctave/oct-sort.h	Mon Jun 29 14:07:32 2009 +0200
@@ -309,11 +309,6 @@
                           const T& value, Comp comp);
 
   template <class Comp>
-  void lookup_merge (const T *data, octave_idx_type lo, octave_idx_type hi,
-                     const T* values, octave_idx_type nvalues,
-                     octave_idx_type *idx, Comp comp);
-
-  template <class Comp>
   void lookup (const T *data, octave_idx_type nel,
                const T* values, octave_idx_type nvalues,
                octave_idx_type *idx, octave_idx_type offset, Comp comp);