Mercurial > octave
changeset 31277:185799b2a566
tsearch.cc: Minor performance improvements
tsearch.cc: replaced if-else with conditional operator
in inline functions, replaced redeclarations of local
variables with one set of declarations and initializations,
added more comments.
For searching 1e3 points through 1e6 triangles, these minor
changes reduced 1.55-1.75 seconds to 1.35-1.45 seconds,
so roughly 15% faster. Still O(M*N) for M points and N
triangles, but acceptably fast for common use.
author | Arun Giridhar <arungiridhar@gmail.com> |
---|---|
date | Sat, 08 Oct 2022 13:14:02 -0400 |
parents | 773a361bc476 |
children | 076c8f94d7f4 |
files | libinterp/corefcn/tsearch.cc |
diffstat | 1 files changed, 35 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/libinterp/corefcn/tsearch.cc Fri Oct 07 14:28:01 2022 -0400 +++ b/libinterp/corefcn/tsearch.cc Sat Oct 08 13:14:02 2022 -0400 @@ -39,18 +39,12 @@ inline double max (double a, double b, double c) { - if (a < b) - return (b < c ? c : b); - else - return (a < c ? c : a); + return (a > b) ? (a > c ? a : c) : (b > c ? b : c); } inline double min (double a, double b, double c) { - if (a > b) - return (b > c ? c : b); - else - return (a > c ? c : a); + return (a < b) ? (a < c ? a : c) : (b < c ? b : c); } #define REF(x,k,i) x(static_cast<octave_idx_type> (elem((k), (i))) - 1) @@ -102,39 +96,42 @@ const octave_idx_type np = xi.numel (); ColumnVector values (np); - double x0, y0, a11, a12, a21, a22, det; - x0 = y0 = 0.0; - a11 = a12 = a21 = a22 = 0.0; - det = 0.0; + double x0 = 0.0, y0 = 0.0; + double a11 = 0.0, a12 = 0.0, a21 = 0.0, a22 = 0.0, det = 0.0; + double xt = 0.0, yt = 0.0; + double dx1 = 0.0, dx2 = 0.0, c1 = 0.0, c2 = 0.0; - octave_idx_type k = nelem; // k is a counter of elements - for (octave_idx_type kp = 0; kp < np; kp++) + octave_idx_type k = nelem; // k is more than just an index variable. + + for (octave_idx_type kp = 0; kp < np; kp++) // for each point { - const double xt = xi(kp); - const double yt = yi(kp); + xt = xi (kp); + yt = yi (kp); - // check if last triangle contains the next point - if (k < nelem) + // Check if point (xt,yt) is in the triangle that was last examined. + // This is for inputs where points are in contiguous order, + // like when the points are sampled from a continuous path. + if (k < nelem) // This check will be false for the very first point. { - const double dx1 = xt - x0; - const double dx2 = yt - y0; - const double c1 = (a22 * dx1 - a21 * dx2) / det; - const double c2 = (-a12 * dx1 + a11 * dx2) / det; - if (c1 >= -eps && c2 >= -eps && (c1 + c2) <= (1 + eps)) + // If we are here, then x0, y0, det all exist from before. + dx1 = xt - x0; + dx2 = yt - y0; + c1 = (a22 * dx1 - a21 * dx2) / det; + c2 = (-a12 * dx1 + a11 * dx2) / det; + if (c1 >= -eps && c2 >= -eps && (c1 + c2) <= 1 + eps) { - values(kp) = double(k+1); + values (kp) = k+1; continue; } } - // it doesn't, so go through all elements + // The point is not in the same triangle, so go through all triangles. for (k = 0; k < nelem; k++) { - octave_quit (); - if (xt >= minx(k) && xt <= maxx(k) && yt >= miny(k) && yt <= maxy(k)) { - // element inside the minimum rectangle: examine it closely + // Point is inside the triangle's bounding rectangle: + // See if it's inside the triangle itself. x0 = REF (x, k, 0); y0 = REF (y, k, 0); a11 = REF (x, k, 1) - x0; @@ -144,22 +141,22 @@ det = a11 * a22 - a21 * a12; // solve the system - const double dx1 = xt - x0; - const double dx2 = yt - y0; - const double c1 = (a22 * dx1 - a21 * dx2) / det; - const double c2 = (-a12 * dx1 + a11 * dx2) / det; - if ((c1 >= -eps) && (c2 >= -eps) && ((c1 + c2) <= (1 + eps))) + dx1 = xt - x0; + dx2 = yt - y0; + c1 = (a22 * dx1 - a21 * dx2) / det; + c2 = (-a12 * dx1 + a11 * dx2) / det; + if (c1 >= -eps && c2 >= -eps && (c1 + c2) <= 1 + eps) { - values(kp) = double(k+1); + values (kp) = k+1; break; } - } //endif # examine this element closely - } //endfor # each element + } //end see if it's inside the triangle itself + } //end for each triangle if (k == nelem) - values(kp) = lo_ieee_nan_value (); + values (kp) = lo_ieee_nan_value (); - } //endfor # kp + } //end for each point return ovl (values); }