view scripts/testfun/speed.m @ 14363:f3d52523cde1

Use Octave coding conventions in all m-file %!test blocks * wavread.m, acosd.m, acot.m, acotd.m, acoth.m, acsc.m, acscd.m, acsch.m, asec.m, asecd.m, asech.m, asind.m, atand.m, cosd.m, cot.m, cotd.m, coth.m, csc.m, cscd.m, csch.m, sec.m, secd.m, sech.m, sind.m, tand.m, accumarray.m, accumdim.m, bitcmp.m, bitget.m, bitset.m, blkdiag.m, cart2pol.m, cart2sph.m, celldisp.m, chop.m, circshift.m, colon.m, common_size.m, cplxpair.m, cumtrapz.m, curl.m, dblquad.m, deal.m, divergence.m, flipdim.m, fliplr.m, flipud.m, genvarname.m, gradient.m, idivide.m, int2str.m, interp1.m, interp1q.m, interp2.m, interp3.m, interpft.m, interpn.m, isa.m, isdir.m, isequal.m, isequalwithequalnans.m, issquare.m, logspace.m, nargchk.m, narginchk.m, nargoutchk.m, nextpow2.m, nthargout.m, num2str.m, pol2cart.m, polyarea.m, postpad.m, prepad.m, profile.m, profshow.m, quadgk.m, quadv.m, randi.m, rat.m, repmat.m, rot90.m, rotdim.m, shift.m, shiftdim.m, sph2cart.m, structfun.m, trapz.m, triplequad.m, convhull.m, dsearch.m, dsearchn.m, griddata3.m, griddatan.m, rectint.m, tsearchn.m, __makeinfo__.m, doc.m, get_first_help_sentence.m, help.m, type.m, unimplemented.m, which.m, imread.m, imwrite.m, dlmwrite.m, fileread.m, is_valid_file_id.m, strread.m, textread.m, textscan.m, commutation_matrix.m, cond.m, condest.m, cross.m, duplication_matrix.m, expm.m, housh.m, isdefinite.m, ishermitian.m, issymmetric.m, logm.m, normest.m, null.m, onenormest.m, orth.m, planerot.m, qzhess.m, rank.m, rref.m, trace.m, vech.m, ans.m, bincoeff.m, bug_report.m, bzip2.m, comma.m, compare_versions.m, computer.m, edit.m, fileparts.m, fullfile.m, getfield.m, gzip.m, info.m, inputname.m, isappdata.m, isdeployed.m, ismac.m, ispc.m, isunix.m, list_primes.m, ls.m, mexext.m, namelengthmax.m, news.m, orderfields.m, paren.m, recycle.m, rmappdata.m, semicolon.m, setappdata.m, setfield.m, substruct.m, symvar.m, ver.m, version.m, warning_ids.m, xor.m, fminbnd.m, fsolve.m, fzero.m, lsqnonneg.m, optimset.m, pqpnonneg.m, sqp.m, matlabroot.m, __gnuplot_drawnow__.m, __plt_get_axis_arg__.m, ancestor.m, cla.m, clf.m, close.m, colorbar.m, colstyle.m, comet3.m, contourc.m, figure.m, gca.m, gcbf.m, gcbo.m, gcf.m, ginput.m, graphics_toolkit.m, gtext.m, hggroup.m, hist.m, hold.m, isfigure.m, ishghandle.m, ishold.m, isocolors.m, isonormals.m, isosurface.m, isprop.m, legend.m, line.m, loglog.m, loglogerr.m, meshgrid.m, ndgrid.m, newplot.m, orient.m, patch.m, plot3.m, plotyy.m, __print_parse_opts__.m, quiver3.m, refreshdata.m, ribbon.m, semilogx.m, semilogxerr.m, semilogy.m, stem.m, stem3.m, subplot.m, title.m, uigetfile.m, view.m, whitebg.m, compan.m, conv.m, deconv.m, mkpp.m, mpoles.m, pchip.m, poly.m, polyaffine.m, polyder.m, polyfit.m, polygcd.m, polyint.m, polyout.m, polyval.m, polyvalm.m, ppder.m, ppint.m, ppjumps.m, ppval.m, residue.m, roots.m, spline.m, intersect.m, ismember.m, powerset.m, setdiff.m, setxor.m, union.m, unique.m, autoreg_matrix.m, bartlett.m, blackman.m, detrend.m, fftconv.m, fftfilt.m, fftshift.m, freqz.m, hamming.m, hanning.m, ifftshift.m, sinc.m, sinetone.m, sinewave.m, unwrap.m, bicg.m, bicgstab.m, gmres.m, gplot.m, nonzeros.m, pcg.m, pcr.m, spaugment.m, spconvert.m, spdiags.m, speye.m, spfun.m, spones.m, sprand.m, sprandsym.m, spstats.m, spy.m, svds.m, treelayout.m, bessel.m, beta.m, betaln.m, factor.m, factorial.m, isprime.m, lcm.m, legendre.m, nchoosek.m, nthroot.m, perms.m, pow2.m, primes.m, reallog.m, realpow.m, realsqrt.m, hadamard.m, hankel.m, hilb.m, invhilb.m, magic.m, rosser.m, vander.m, __finish__.m, center.m, cloglog.m, corr.m, cov.m, gls.m, histc.m, iqr.m, kendall.m, kurtosis.m, logit.m, mahalanobis.m, mean.m, meansq.m, median.m, mode.m, moment.m, ols.m, ppplot.m, prctile.m, probit.m, quantile.m, range.m, ranks.m, run_count.m, runlength.m, skewness.m, spearman.m, statistics.m, std.m, table.m, var.m, zscore.m, betacdf.m, betainv.m, betapdf.m, betarnd.m, binocdf.m, binoinv.m, binopdf.m, binornd.m, cauchy_cdf.m, cauchy_inv.m, cauchy_pdf.m, cauchy_rnd.m, chi2cdf.m, chi2inv.m, chi2pdf.m, chi2rnd.m, discrete_cdf.m, discrete_inv.m, discrete_pdf.m, discrete_rnd.m, empirical_cdf.m, empirical_inv.m, empirical_pdf.m, empirical_rnd.m, expcdf.m, expinv.m, exppdf.m, exprnd.m, fcdf.m, finv.m, fpdf.m, frnd.m, gamcdf.m, gaminv.m, gampdf.m, gamrnd.m, geocdf.m, geoinv.m, geopdf.m, geornd.m, hygecdf.m, hygeinv.m, hygepdf.m, hygernd.m, kolmogorov_smirnov_cdf.m, laplace_cdf.m, laplace_inv.m, laplace_pdf.m, laplace_rnd.m, logistic_cdf.m, logistic_inv.m, logistic_pdf.m, logistic_rnd.m, logncdf.m, logninv.m, lognpdf.m, lognrnd.m, nbincdf.m, nbininv.m, nbinpdf.m, nbinrnd.m, normcdf.m, norminv.m, normpdf.m, normrnd.m, poisscdf.m, poissinv.m, poisspdf.m, poissrnd.m, stdnormal_cdf.m, stdnormal_inv.m, stdnormal_pdf.m, stdnormal_rnd.m, tcdf.m, tinv.m, tpdf.m, trnd.m, unidcdf.m, unidinv.m, unidpdf.m, unidrnd.m, unifcdf.m, unifinv.m, unifpdf.m, unifrnd.m, wblcdf.m, wblinv.m, wblpdf.m, wblrnd.m, kolmogorov_smirnov_test.m, kruskal_wallis_test.m, base2dec.m, bin2dec.m, blanks.m, cstrcat.m, deblank.m, dec2base.m, dec2bin.m, dec2hex.m, findstr.m, hex2dec.m, index.m, isletter.m, mat2str.m, rindex.m, str2num.m, strcat.m, strjust.m, strmatch.m, strsplit.m, strtok.m, strtrim.m, strtrunc.m, substr.m, validatestring.m, demo.m, example.m, fail.m, speed.m, addtodate.m, asctime.m, clock.m, ctime.m, date.m, datenum.m, datetick.m, datevec.m, eomday.m, etime.m, is_leap_year.m, now.m: Use Octave coding conventions in all m-file %!test blocks
author Rik <octave@nomad.inbox5.com>
date Mon, 13 Feb 2012 07:29:44 -0800
parents 11949c9795a0
children 5d3a684236b0
line wrap: on
line source

## Copyright (C) 2000-2012 Paul Kienzle
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn  {Function File} {} speed (@var{f}, @var{init}, @var{max_n}, @var{f2}, @var{tol})
## @deftypefnx {Function File} {[@var{order}, @var{n}, @var{T_f}, @var{T_f2}] =} speed (@dots{})
##
## Determine the execution time of an expression (@var{f}) for various input
## values (@var{n}).  The @var{n} are log-spaced from 1 to @var{max_n}.  For
## each @var{n}, an initialization expression (@var{init}) is computed to
## create any data needed for the test.  If a second expression (@var{f2}) is
## given then the execution times of the two expressions are compared.  When
## called without output arguments the results are printed to stdout and
## displayed graphically.
##
## @table @code
## @item @var{f}
## The code expression to evaluate.
##
## @item @var{max_n}
## The maximum test length to run.  The default value is 100.  Alternatively,
## use @code{[min_n, max_n]} or specify the @var{n} exactly with
## @code{[n1, n2, @dots{}, nk]}.
##
## @item @var{init}
## Initialization expression for function argument values.  Use @var{k}
## for the test number and @var{n} for the size of the test.  This should
## compute values for all variables used by @var{f}.  Note that @var{init} will
## be evaluated first for @math{k = 0}, so things which are constant throughout
## the test series can be computed once.  The default value is
## @code{@var{x} = randn (@var{n}, 1)}.
##
## @item @var{f2}
## An alternative expression to evaluate, so that the speed of two
## expressions can be directly compared.  The default is @code{[]}.
##
## @item @var{tol}
## Tolerance used to compare the results of expression @var{f} and expression
## @var{f2}.  If @var{tol} is positive, the tolerance is an absolute one.
## If @var{tol} is negative, the tolerance is a relative one.  The default is
## @code{eps}.  If @var{tol} is @code{Inf}, then no comparison will be made.
##
## @item @var{order}
## The time complexity of the expression @math{O(a*n^p)}.  This
## is a structure with fields @code{a} and @code{p}.
##
## @item @var{n}
## The values @var{n} for which the expression was calculated @strong{AND}
## the execution time was greater than zero.
##
## @item @var{T_f}
## The nonzero execution times recorded for the expression @var{f} in seconds.
##
## @item @var{T_f2}
## The nonzero execution times recorded for the expression @var{f2} in seconds.
## If required, the mean time ratio is simply @code{mean (T_f ./ T_f2)}.
##
## @end table
##
## The slope of the execution time graph shows the approximate
## power of the asymptotic running time @math{O(n^p)}.  This
## power is plotted for the region over which it is approximated
## (the latter half of the graph).  The estimated power is not
## very accurate, but should be sufficient to determine the
## general order of an algorithm.  It should indicate if, for
## example, the implementation is unexpectedly @math{O(n^2)}
## rather than @math{O(n)} because it extends a vector each
## time through the loop rather than pre-allocating storage.
## In the current version of Octave, the following is not the
## expected @math{O(n)}.
##
## @example
## speed ("for i = 1:n, y@{i@} = x(i); endfor", "", [1000, 10000])
## @end example
##
## @noindent
## But it is if you preallocate the cell array @code{y}:
##
## @example
## @group
## speed ("for i = 1:n, y@{i@} = x(i); endfor", ...
##        "x = rand (n, 1); y = cell (size (x));", [1000, 10000])
## @end group
## @end example
##
## An attempt is made to approximate the cost of individual
## operations, but it is wildly inaccurate.  You can improve the
## stability somewhat by doing more work for each @code{n}.  For
## example:
##
## @example
## speed ("airy(x)", "x = rand (n, 10)", [10000, 100000])
## @end example
##
## When comparing two different expressions (@var{f}, @var{f2}), the slope
## of the line on the speedup ratio graph should be larger than 1 if the new
## expression is faster.  Better algorithms have a shallow slope.  Generally,
## vectorizing an algorithm will not change the slope of the execution
## time graph, but will shift it relative to the original.  For
## example:
##
## @example
## @group
## speed ("sum (x)", "", [10000, 100000], ...
##        "v = 0; for i = 1:length (x), v += x(i); endfor")
## @end group
## @end example
##
## The following is a more complex example.  If there was an original version
## of @code{xcorr} using for loops and a second version using an FFT, then
## one could compare the run speed for various lags as follows, or for a fixed
## lag with varying vector lengths as follows:
##
## @example
## @group
## speed ("xcorr (x, n)", "x = rand (128, 1);", 100,
##        "xcorr_orig (x, n)", -100*eps)
## speed ("xcorr (x, 15)", "x = rand (20+n, 1);", 100,
##        "xcorr_orig (x, n)", -100*eps)
## @end group
## @end example
##
## Assuming one of the two versions is in xcorr_orig, this
## would compare their speed and their output values.  Note that the
## FFT version is not exact, so one must specify an acceptable tolerance on
## the comparison @code{100*eps}.  In this case, the comparison should be
## computed relatively, as @code{abs ((@var{x} - @var{y}) ./ @var{y})} rather
## than absolutely as @code{abs (@var{x} - @var{y})}.
##
## Type @kbd{example ("speed")} to see some real examples or 
## @kbd{demo ("speed")} to run them.
## @end deftypefn

## FIXME: consider two dimensional speedup surfaces for functions like kron.
function [__order, __test_n, __tnew, __torig] = speed (__f1, __init, __max_n = 100, __f2 = "", __tol = eps)

  if (nargin < 1 || nargin > 6)
    print_usage ();
  endif

  if (nargin < 2 || isempty (__init))
    __init = "x = randn (n, 1)";
  endif

  if (isempty (__max_n))
    __max_n = 100;
  endif

  __numtests = 15;

  ## Let user specify range of n.
  if (isscalar (__max_n))
    __min_n = 1;
    assert (__max_n > __min_n);
    __test_n = logspace (0, log10 (__max_n), __numtests);
  elseif (length (__max_n) == 2)
    [__min_n, __max_n] = deal (__max_n(1), __max_n(2));
    assert (__min_n >= 1);
    assert (__max_n > __min_n);
    __test_n = logspace (log10 (__min_n), log10 (__max_n), __numtests);
  else
    assert (all (__max_n > 0));
    __test_n = __max_n;
  endif
  ## Force n to be an integer.
  __test_n = unique (round (__test_n));
  assert (__test_n >= 1);

  __torig = __tnew = zeros (size (__test_n));

  ## Print and plot the data if no output is requested.
  do_display = (nargout == 0);

  if (do_display)
    disp (cstrcat ("testing ", __f1, "\ninit: ", __init));
  endif

  ## Add semicolon closure to all code fragments in case user has not done so.
  __init = cstrcat (__init, ";");
  __f1 = cstrcat (__f1, ";");
  if (! isempty (__f2))
    __f2 = cstrcat (__f2, ";");
  endif

  ## Make sure the functions are freshly loaded by evaluating them at
  ## test_n(1); first have to initialize the args though.
  n = 1;
  k = 0;
  eval (__init);
  eval (__f1);
  if (! isempty (__f2))
    eval (__f2);
  endif

  ## Run the tests.
  for k = 1:length (__test_n)
    n = __test_n(k);
    eval (__init);

    if (do_display)
      printf ("n%i = %i  ", k, n);
      fflush (stdout);
    endif

    eval (cstrcat ("__t = time();", __f1, "__v1=ans; __t = time()-__t;"));
    if (__t < 0.25)
      eval (cstrcat ("__t2 = time();", __f1, "__t2 = time()-__t2;"));
      eval (cstrcat ("__t3 = time();", __f1, "__t3 = time()-__t3;"));
      __t = min ([__t, __t2, __t3]);
    endif
    __tnew(k) = __t;

    if (! isempty (__f2))
      eval (cstrcat ("__t = time();", __f2, "__v2=ans; __t = time()-__t;"));
      if (__t < 0.25)
        eval (cstrcat ("__t2 = time();", __f2, "__t2 = time()-__t2;"));
        eval (cstrcat ("__t3 = time();", __f2, "__t3 = time()-__t3;"));
        __t = min ([__t, __t2, __t3]);
      endif
      __torig(k) = __t;
      if (! isinf(__tol))
        assert (__v1, __v2, __tol);
      endif
    endif

  endfor

  ## Drop times of zero.
  if (isempty (__f2))
    zidx = (__tnew < 100*eps);
    __test_n(zidx) = [];
    __tnew(zidx) = [];
  else
    zidx = (__tnew < 100*eps | __torig < 100*eps);
    __test_n(zidx) = [];
    __tnew(zidx) = [];
    __torig(zidx) = [];
  endif

  if (isempty (__test_n))
    error (["speed: All running times were zero.\n",
            "error: speed: Choose larger MAX_N or do more work per function evaluation"]);
  endif

  ## Approximate time complexity and return it if requested.
  tailidx = ceil (length (__test_n)/2):length (__test_n);
  p = polyfit (log (__test_n(tailidx)), log (__tnew(tailidx)), 1);
  if (nargout > 0)
    __order.p = p(1);
    __order.a = exp (p(2));
  endif

  if (do_display)
    figure;
    ## Strip semicolon added to code fragments before displaying
    __init(end) = ""; 
    __f1(end) = ""; 
    if (! isempty (__f2))
      __f2(end) = ""; 
    endif
  endif

  if (do_display && isempty (__f2))

    loglog (__test_n, __tnew*1000, "*-g;execution time;");
    xlabel ("test length");
    ylabel ("best execution time (ms)");
    title ({__f1, cstrcat("init: ", __init)});

  elseif (do_display)

    subplot (1, 2, 1);
    semilogx (__test_n, __torig./__tnew,
              cstrcat ("-*r;", strrep (__f1, ";", "."), " / ",
                       strrep (__f2, ";", "."), ";"),
               __test_n, __tnew./__torig,
              cstrcat ("-*g;", strrep (__f2, ";", "."), " / ",
                       strrep (__f1, ";", "."), ";"));
    title ("Speedup Ratio");
    xlabel ("test length");
    ylabel ("speedup ratio");

    subplot (1, 2, 2);
    loglog (__test_n, __tnew*1000,
            cstrcat ("*-g;", strrep (__f1, ";", "."), ";"),
            __test_n, __torig*1000,
            cstrcat ("*-r;", strrep (__f2,";","."), ";"));
    title ({"Execution Times", cstrcat("init: ", __init)});
    xlabel ("test length");
    ylabel ("best execution time (ms)");

    ratio = mean (__torig ./ __tnew);
    printf ("\n\nMean runtime ratio = %.3g for '%s' vs '%s'\n",
            ratio, __f2, __f1);

  endif

  if (do_display)

    ## Plot time complexity approximation (using milliseconds).
    figure;   # Open second plot window

    order = round (10*p(1))/10;
    if (order >= 0.1)
      order = sprintf ("O(n^%g)", order);
    else
      order = "O(1)";
    endif
    v = polyval (p, log (__test_n(tailidx)));

    loglog (__test_n(tailidx), exp(v)*1000, sprintf ("b;%s;", order));
    title ({"Time Complexity", __f1});
    xlabel ("test length");

    ## Get base time to 1 digit of accuracy.
    dt = exp (p(2));
    dt = floor (dt/10^floor(log10(dt)))*10^floor(log10(dt));
    if (log10 (dt) >= -0.5)
      time = sprintf ("%g s", dt);
    elseif (log10 (dt) >= -3.5)
      time = sprintf ("%g ms", dt*1e3);
    elseif (log10 (dt) >= -6.5)
      time = sprintf ("%g us", dt*1e6);
    else
      time = sprintf ("%g ns", dt*1e9);
    endif

    ## Display nicely formatted complexity.
    printf ("\nFor %s:\n", __f1);
    printf ("  asymptotic power: %s\n", order);
    printf ("  approximate time per operation: %s\n", time);

  endif

endfunction


%% FIXME: Demos with declared functions do not work.  See bug #31815.
%%        A workaround has been hacked by not declaring the functions
%%        but using eval to create them in the proper context.
%%        Unfortunately, we can't remove them from the user's workspace
%%        because of another bug (#34497).
%!demo
%! fstr_build_orig = cstrcat (
%!   "function x = build_orig (n)\n",
%!   "  ## extend the target vector on the fly\n",
%!   "  for i=0:n-1, x([1:100]+i*100) = 1:100; endfor\n",
%!   "endfunction");
%! fstr_build = cstrcat (
%!   "function x = build (n)\n",
%!   "  ## preallocate the target vector\n",
%!   "  x = zeros (1, n*100);\n",
%!   "  for i=0:n-1, x([1:100]+i*100) = 1:100; endfor\n",
%!   "endfunction");
%!
%! disp ("-----------------------");
%! disp (fstr_build_orig);
%! disp ("-----------------------");
%! disp (fstr_build);
%! disp ("-----------------------");
%!
%! ## Eval functions strings to create them in the current context
%! eval (fstr_build_orig);
%! eval (fstr_build);
%!
%! disp ("Preallocated vector test.\nThis takes a little while...");
%! speed("build (n)", "", 1000, "build_orig (n)");
%! clear -f build build_orig
%! disp ("-----------------------");
%! disp ("Note how much faster it is to pre-allocate a vector.");
%! disp ("Notice the peak speedup ratio.");

%!demo
%! fstr_build_orig = cstrcat (
%!   "function x = build_orig (n)\n",
%!   "  for i=0:n-1, x([1:100]+i*100) = 1:100; endfor\n",
%!   "endfunction");
%! fstr_build = cstrcat (
%!   "function x = build (n)\n",
%!   "  idx = [1:100]';\n",
%!   "  x = idx(:,ones(1,n));\n",
%!   "  x = reshape (x, 1, n*100);\n",
%!   "endfunction");
%!
%! disp ("-----------------------");
%! disp (fstr_build_orig);
%! disp ("-----------------------");
%! disp (fstr_build);
%! disp ("-----------------------");
%!
%! ## Eval functions strings to create them in the current context
%! eval (fstr_build_orig);
%! eval (fstr_build);
%!
%! disp ("Vectorized test.\nThis takes a little while...");
%! speed("build (n)", "", 1000, "build_orig (n)");
%! clear -f build build_orig
%! disp ("-----------------------");
%! disp ("This time, the for loop is done away with entirely.");
%! disp ("Notice how much bigger the speedup is than in example 1.");

%!test
%! [order, n, T_f1, T_f2] = speed ("airy (x)", "x = rand (n, 10)", [100, 1000]);
%! assert (isstruct (order));
%! assert (size (order), [1, 1]);
%! assert (fieldnames (order), {"p"; "a"});
%! assert (isnumeric (n));
%! assert (length (n) > 10);
%! assert (isnumeric (T_f1));
%! assert (size (T_f1), size (n));
%! assert (isnumeric (T_f2));
%! assert (length (T_f2) > 10);

%% This test is known to fail on operating systems with low resolution timers
%% such as MinGW
%!xtest
%! [order, n, T_f1, T_f2] = speed ("sum (x)", "", [100, 1000], "v = 0; for i = 1:length (x), v += x(i); endfor");
%! assert (isstruct (order));
%! assert (size (order), [1, 1]);
%! assert (fieldnames (order), {"p"; "a"});
%! assert (isnumeric (n));
%! assert (length (n) > 10);
%! assert (isnumeric (T_f1));
%! assert (size (T_f1), size (n));
%! assert (isnumeric (T_f2));
%! assert (length (T_f2) > 10);

%% Test input validation
%!error speed ()
%!error speed (1, 2, 3, 4, 5, 6, 7)