view scripts/optimization/fminunc.m @ 17338:1c89599167a6

maint: End m-files with 1 blank line. Simplifies automated grammarchecking script. * scripts/@ftp/ascii.m, scripts/@ftp/binary.m, scripts/@ftp/cd.m, scripts/@ftp/close.m, scripts/@ftp/delete.m, scripts/@ftp/dir.m, scripts/@ftp/display.m, scripts/@ftp/ftp.m, scripts/@ftp/loadobj.m, scripts/@ftp/mget.m, scripts/@ftp/mkdir.m, scripts/@ftp/mput.m, scripts/@ftp/rename.m, scripts/@ftp/rmdir.m, scripts/@ftp/saveobj.m, scripts/audio/lin2mu.m, scripts/audio/loadaudio.m, scripts/audio/mu2lin.m, scripts/audio/record.m, scripts/audio/saveaudio.m, scripts/audio/setaudio.m, scripts/deprecated/__error_text__.m, scripts/deprecated/cut.m, scripts/deprecated/error_text.m, scripts/deprecated/isstr.m, scripts/deprecated/polyderiv.m, scripts/deprecated/studentize.m, scripts/deprecated/sylvester_matrix.m, scripts/general/bicubic.m, scripts/general/celldisp.m, scripts/general/colon.m, scripts/general/cplxpair.m, scripts/general/del2.m, scripts/general/display.m, scripts/general/isdir.m, scripts/general/isequaln.m, scripts/general/loadobj.m, scripts/general/private/__isequal__.m, scripts/general/private/__splinen__.m, scripts/general/profexplore.m, scripts/general/quadgk.m, scripts/general/randi.m, scripts/general/repmat.m, scripts/general/saveobj.m, scripts/geometry/delaunay.m, scripts/help/__unimplemented__.m, scripts/help/doc_cache_create.m, scripts/help/get_first_help_sentence.m, scripts/help/help.m, scripts/help/print_usage.m, scripts/help/private/__additional_help_message__.m, scripts/help/private/__strip_html_tags__.m, scripts/help/type.m, scripts/image/imfinfo.m, scripts/image/imformats.m, scripts/image/imread.m, scripts/image/imwrite.m, scripts/image/private/__imfinfo__.m, scripts/image/private/__imread__.m, scripts/image/private/__imwrite__.m, scripts/image/private/imageIO.m, scripts/image/private/imwrite_filename.m, scripts/image/private/ind2x.m, scripts/io/beep.m, scripts/io/strread.m, scripts/io/textread.m, scripts/io/textscan.m, scripts/linear-algebra/krylov.m, scripts/linear-algebra/subspace.m, scripts/miscellaneous/bug_report.m, scripts/miscellaneous/bunzip2.m, scripts/miscellaneous/cast.m, scripts/miscellaneous/copyfile.m, scripts/miscellaneous/debug.m, scripts/miscellaneous/dir.m, scripts/miscellaneous/dump_prefs.m, scripts/miscellaneous/error_ids.m, scripts/miscellaneous/fileattrib.m, scripts/miscellaneous/gunzip.m, scripts/miscellaneous/isdeployed.m, scripts/miscellaneous/ismac.m, scripts/miscellaneous/mex.m, scripts/miscellaneous/mexext.m, scripts/miscellaneous/mkoctfile.m, scripts/miscellaneous/movefile.m, scripts/miscellaneous/namelengthmax.m, scripts/miscellaneous/news.m, scripts/miscellaneous/pack.m, scripts/miscellaneous/perl.m, scripts/miscellaneous/private/display_info_file.m, scripts/miscellaneous/python.m, scripts/miscellaneous/rmappdata.m, scripts/miscellaneous/run.m, scripts/miscellaneous/tar.m, scripts/miscellaneous/tempname.m, scripts/miscellaneous/untar.m, scripts/miscellaneous/unzip.m, scripts/miscellaneous/what.m, scripts/miscellaneous/zip.m, scripts/optimization/fminunc.m, scripts/optimization/fsolve.m, scripts/optimization/fzero.m, scripts/optimization/glpk.m, scripts/optimization/optimget.m, scripts/optimization/optimset.m, scripts/optimization/qp.m, scripts/optimization/sqp.m, scripts/path/pathdef.m, scripts/pkg/pkg.m, scripts/pkg/private/build.m, scripts/pkg/private/describe.m, scripts/pkg/private/dirempty.m, scripts/pkg/private/get_forge_download.m, scripts/pkg/private/get_forge_pkg.m, scripts/pkg/private/get_unsatisfied_deps.m, scripts/pkg/private/install.m, scripts/pkg/private/is_architecture_dependent.m, scripts/pkg/private/list_forge_packages.m, scripts/pkg/private/rebuild.m, scripts/pkg/private/shell.m, scripts/pkg/private/uninstall.m, scripts/plot/axes.m, scripts/plot/box.m, scripts/plot/closereq.m, scripts/plot/diffuse.m, scripts/plot/ezpolar.m, scripts/plot/findfigs.m, scripts/plot/gco.m, scripts/plot/guidata.m, scripts/plot/guihandles.m, scripts/plot/hdl2struct.m, scripts/plot/linkprop.m, scripts/plot/peaks.m, scripts/plot/print.m, scripts/plot/private/__add_datasource__.m, scripts/plot/private/__axis_label__.m, scripts/plot/private/__clabel__.m, scripts/plot/private/__color_str_rgb__.m, scripts/plot/private/__contour__.m, scripts/plot/private/__default_plot_options__.m, scripts/plot/private/__errcomm__.m, scripts/plot/private/__file_filter__.m, scripts/plot/private/__fltk_file_filter__.m, scripts/plot/private/__getlegenddata__.m, scripts/plot/private/__gnuplot_open_stream__.m, scripts/plot/private/__gnuplot_print__.m, scripts/plot/private/__go_draw_axes__.m, scripts/plot/private/__interp_cube__.m, scripts/plot/private/__is_function__.m, scripts/plot/private/__line__.m, scripts/plot/private/__marching_cube__.m, scripts/plot/private/__next_line_style__.m, scripts/plot/private/__patch__.m, scripts/plot/private/__pie__.m, scripts/plot/private/__pltopt__.m, scripts/plot/private/__quiver__.m, scripts/plot/private/__scatter__.m, scripts/plot/private/__stem__.m, scripts/plot/private/__uigetdir_fltk__.m, scripts/plot/private/__uigetfile_fltk__.m, scripts/plot/private/__uiobject_split_args__.m, scripts/plot/private/__uiputfile_fltk__.m, scripts/plot/refresh.m, scripts/plot/saveas.m, scripts/plot/shg.m, scripts/plot/specular.m, scripts/plot/sphere.m, scripts/plot/struct2hdl.m, scripts/plot/subplot.m, scripts/plot/uicontextmenu.m, scripts/plot/uicontrol.m, scripts/plot/uipanel.m, scripts/plot/uipushtool.m, scripts/plot/uiresume.m, scripts/plot/uitoggletool.m, scripts/plot/uitoolbar.m, scripts/plot/uiwait.m, scripts/plot/waitforbuttonpress.m, scripts/polynomial/pchip.m, scripts/polynomial/polyeig.m, scripts/polynomial/ppval.m, scripts/prefs/addpref.m, scripts/prefs/getpref.m, scripts/prefs/ispref.m, scripts/prefs/private/loadprefs.m, scripts/prefs/private/prefsfile.m, scripts/prefs/private/saveprefs.m, scripts/prefs/setpref.m, scripts/set/private/validargs.m, scripts/set/unique.m, scripts/signal/arch_fit.m, scripts/signal/arch_rnd.m, scripts/signal/arch_test.m, scripts/signal/arma_rnd.m, scripts/signal/durbinlevinson.m, scripts/signal/fractdiff.m, scripts/signal/freqz.m, scripts/signal/freqz_plot.m, scripts/signal/hurst.m, scripts/signal/periodogram.m, scripts/signal/private/rectangle_lw.m, scripts/signal/private/rectangle_sw.m, scripts/signal/private/triangle_sw.m, scripts/signal/spectral_adf.m, scripts/signal/spectral_xdf.m, scripts/signal/stft.m, scripts/signal/synthesis.m, scripts/signal/yulewalker.m, scripts/sparse/colperm.m, scripts/sparse/eigs.m, scripts/sparse/etreeplot.m, scripts/sparse/gmres.m, scripts/sparse/private/__sprand_impl__.m, scripts/sparse/spdiags.m, scripts/sparse/sprandn.m, scripts/specfun/bessel.m, scripts/specfun/betaln.m, scripts/specfun/expint.m, scripts/special-matrix/gallery.m, scripts/startup/__finish__.m, scripts/statistics/base/qqplot.m, scripts/statistics/distributions/tcdf.m, scripts/statistics/distributions/wienrnd.m, scripts/statistics/models/logistic_regression.m, scripts/statistics/models/private/logistic_regression_derivatives.m, scripts/statistics/models/private/logistic_regression_likelihood.m, scripts/statistics/tests/anova.m, scripts/statistics/tests/bartlett_test.m, scripts/statistics/tests/chisquare_test_homogeneity.m, scripts/statistics/tests/chisquare_test_independence.m, scripts/statistics/tests/cor_test.m, scripts/statistics/tests/f_test_regression.m, scripts/statistics/tests/hotelling_test.m, scripts/statistics/tests/hotelling_test_2.m, scripts/statistics/tests/kolmogorov_smirnov_test_2.m, scripts/statistics/tests/kruskal_wallis_test.m, scripts/statistics/tests/manova.m, scripts/statistics/tests/mcnemar_test.m, scripts/statistics/tests/prop_test_2.m, scripts/statistics/tests/run_test.m, scripts/statistics/tests/sign_test.m, scripts/statistics/tests/t_test.m, scripts/statistics/tests/t_test_2.m, scripts/statistics/tests/t_test_regression.m, scripts/statistics/tests/u_test.m, scripts/statistics/tests/var_test.m, scripts/statistics/tests/welch_test.m, scripts/statistics/tests/wilcoxon_test.m, scripts/statistics/tests/z_test.m, scripts/statistics/tests/z_test_2.m, scripts/strings/strcat.m, scripts/strings/strjoin.m, scripts/strings/strsplit.m, scripts/testfun/__have_feature__.m, scripts/testfun/__printf_assert__.m, scripts/testfun/__prog_output_assert__.m, scripts/testfun/__run_test_suite__.m, scripts/time/clock.m, scripts/time/datenum.m, scripts/ui/errordlg.m, scripts/ui/private/message_dialog.m: End m-files with 1 blank line.
author Rik <>
date Wed, 28 Aug 2013 08:33:02 -0700
parents bc924baa2c4e
children d63878346099
line wrap: on
line source

## Copyright (C) 2008-2012 VZLU Prague, a.s.
## This file is part of Octave.
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <>.
## Author: Jaroslav Hajek <>

## -*- texinfo -*-
## @deftypefn  {Function File} {} fminunc (@var{fcn}, @var{x0})
## @deftypefnx {Function File} {} fminunc (@var{fcn}, @var{x0}, @var{options})
## @deftypefnx {Function File} {[@var{x}, @var{fvec}, @var{info}, @var{output}, @var{grad}, @var{hess}] =} fminunc (@var{fcn}, @dots{})
## Solve an unconstrained optimization problem defined by the function
## @var{fcn}.
## @var{fcn} should accepts a vector (array) defining the unknown variables,
## and return the objective function value, optionally with gradient.
## In other words, this function attempts to determine a vector @var{x} such
## that @code{@var{fcn} (@var{x})} is a local minimum.
## @var{x0} determines a starting guess.  The shape of @var{x0} is preserved
## in all calls to @var{fcn}, but otherwise is treated as a column vector.
## @var{options} is a structure specifying additional options.
## Currently, @code{fminunc} recognizes these options:
## @qcode{"FunValCheck"}, @qcode{"OutputFcn"}, @qcode{"TolX"},
## @qcode{"TolFun"}, @qcode{"MaxIter"}, @qcode{"MaxFunEvals"},
## @qcode{"GradObj"}, @qcode{"FinDiffType"},
## @qcode{"TypicalX"}, @qcode{"AutoScaling"}.
## If @qcode{"GradObj"} is @qcode{"on"}, it specifies that @var{fcn},
## called with 2 output arguments, also returns the Jacobian matrix
## of right-hand sides at the requested point.  @qcode{"TolX"} specifies
## the termination tolerance in the unknown variables, while
## @qcode{"TolFun"} is a tolerance for equations.  Default is @code{1e-7}
## for both @qcode{"TolX"} and @qcode{"TolFun"}.
## For description of the other options, see @code{optimset}.
## On return, @var{fval} contains the value of the function @var{fcn}
## evaluated at @var{x}, and @var{info} may be one of the following values:
## @table @asis
## @item 1
## Converged to a solution point.  Relative gradient error is less than
## specified
## by TolFun.
## @item 2
## Last relative step size was less that TolX.
## @item 3
## Last relative decrease in function value was less than TolF.
## @item 0
## Iteration limit exceeded.
## @item -3
## The trust region radius became excessively small.
## @end table
## Optionally, fminunc can also yield a structure with convergence statistics
## (@var{output}), the output gradient (@var{grad}) and approximate Hessian
## (@var{hess}).
## Notes: If you only have a single nonlinear equation of one variable then
## using @code{fminbnd} is usually a much better idea.  The algorithm used is a
## gradient search which depends on the objective function being differentiable.
## If the function has discontinuities it may be better to use a derivative-free
## algorithm such as @code{fminsearch}.
## @seealso{fminbnd, fminsearch, optimset}
## @end deftypefn

## PKG_ADD: ## Discard result to avoid polluting workspace with ans at startup.
## PKG_ADD: [~] = __all_opts__ ("fminunc");

function [x, fval, info, output, grad, hess] = fminunc (fcn, x0, options = struct ())

  ## Get default options if requested.
  if (nargin == 1 && ischar (fcn) && strcmp (fcn, 'defaults'))
    x = optimset ("MaxIter", 400, "MaxFunEvals", Inf,
                  "GradObj", "off", "TolX", 1e-7, "TolFun", 1e-7,
                  "OutputFcn", [], "FunValCheck", "off",
                  "FinDiffType", "central",
                  "TypicalX", [], "AutoScaling", "off");

  if (nargin < 2 || nargin > 3 || ! ismatrix (x0))
    print_usage ();

  if (ischar (fcn))
    fcn = str2func (fcn, "global");

  xsiz = size (x0);
  n = numel (x0);

  has_grad = strcmpi (optimget (options, "GradObj", "off"), "on");
  cdif = strcmpi (optimget (options, "FinDiffType", "central"), "central");
  maxiter = optimget (options, "MaxIter", 400);
  maxfev = optimget (options, "MaxFunEvals", Inf);
  outfcn = optimget (options, "OutputFcn");

  ## Get scaling matrix using the TypicalX option. If set to "auto", the
  ## scaling matrix is estimated using the jacobian.
  typicalx = optimget (options, "TypicalX");
  if (isempty (typicalx))
    typicalx = ones (n, 1);
  autoscale = strcmpi (optimget (options, "AutoScaling", "off"), "on");
  if (! autoscale)
    dg = 1 ./ typicalx;

  funvalchk = strcmpi (optimget (options, "FunValCheck", "off"), "on");

  if (funvalchk)
    ## Replace fcn with a guarded version.
    fcn = @(x) guarded_eval (fcn, x);

  ## These defaults are rather stringent. I think that normally, user
  ## prefers accuracy to performance.

  macheps = eps (class (x0));

  tolx = optimget (options, "TolX", 1e-7);
  tolf = optimget (options, "TolFun", 1e-7);

  factor = 0.1;
  ## FIXME: TypicalX corresponds to user scaling (???)
  autodg = true;

  niter = 1;
  nfev = 0;

  x = x0(:);
  info = 0;

  ## Initial evaluation.
  fval = fcn (reshape (x, xsiz));
  n = length (x);

  if (! isempty (outfcn))
    optimvalues.iter = niter;
    optimvalues.funccount = nfev;
    optimvalues.fval = fval;
    optimvalues.searchdirection = zeros (n, 1);
    state = 'init';
    stop = outfcn (x, optimvalues, state);
    if (stop)
      info = -1;

  nsuciter = 0;
  lastratio = 0;

  grad = [];

  ## Outer loop.
  while (niter < maxiter && nfev < maxfev && ! info)

    grad0 = grad;

    ## Calculate function value and gradient (possibly via FD).
    if (has_grad)
      [fval, grad] = fcn (reshape (x, xsiz));
      grad = grad(:);
      nfev ++;
      grad = __fdjac__ (fcn, reshape (x, xsiz), fval, typicalx, cdif)(:);
      nfev += (1 + cdif) * length (x);

    if (niter == 1)
      ## Initialize by identity matrix.
      hesr = eye (n);
      ## Use the damped BFGS formula.
      y = grad - grad0;
      sBs = sumsq (w);
      Bs = hesr'*w;
      sy = y'*s;
      theta = 0.8 / max (1 - sy / sBs, 0.8);
      r = theta * y + (1-theta) * Bs;
      hesr = cholupdate (hesr, r / sqrt (s'*r), "+");
      [hesr, info] = cholupdate (hesr, Bs / sqrt (sBs), "-");
      if (info)
        hesr = eye (n);

    if (autoscale)
      ## Second derivatives approximate the hessian.
      d2f = norm (hesr, 'columns').';
      if (niter == 1)
        dg = d2f;
        ## FIXME: maybe fixed lower and upper bounds?
        dg = max (0.1*dg, d2f);

    if (niter == 1)
      xn = norm (dg .* x);
      ## FIXME: something better?
      delta = factor * max (xn, 1);

    ## FIXME -- why tolf*n*xn? If abs (e) ~ abs(x) * eps is a vector
    ## of perturbations of x, then norm (hesr*e) <= eps*xn, i.e. by
    ## tolf ~ eps we demand as much accuracy as we can expect.
    if (norm (grad) <= tolf*n*xn)
      info = 1;

    suc = false;
    decfac = 0.5;

    ## Inner loop.
    while (! suc && niter <= maxiter && nfev < maxfev && ! info)

      s = - __doglegm__ (hesr, grad, dg, delta);

      sn = norm (dg .* s);
      if (niter == 1)
        delta = min (delta, sn);

      fval1 = fcn (reshape (x + s, xsiz)) (:);
      nfev ++;

      if (fval1 < fval)
        ## Scaled actual reduction.
        actred =  (fval - fval1) / (abs (fval1) + abs (fval));
        actred = -1;

      w = hesr*s;
      ## Scaled predicted reduction, and ratio.
      t = 1/2 * sumsq (w) + grad'*s;
      if (t < 0)
        prered = -t/(abs (fval) + abs (fval + t));
        ratio = actred / prered;
        prered = 0;
        ratio = 0;

      ## Update delta.
      if (ratio < min (max (0.1, 0.8*lastratio), 0.9))
        delta *= decfac;
        decfac ^= 1.4142;
        if (delta <= 1e1*macheps*xn)
          ## Trust region became uselessly small.
          info = -3;
        lastratio = ratio;
        decfac = 0.5;
        if (abs (1-ratio) <= 0.1)
          delta = 1.4142*sn;
        elseif (ratio >= 0.5)
          delta = max (delta, 1.4142*sn);

      if (ratio >= 1e-4)
        ## Successful iteration.
        x += s;
        xn = norm (dg .* x);
        fval = fval1;
        nsuciter ++;
        suc = true;

      niter ++;

      ## FIXME: should outputfcn be only called after a successful iteration?
      if (! isempty (outfcn))
        optimvalues.iter = niter;
        optimvalues.funccount = nfev;
        optimvalues.fval = fval;
        optimvalues.searchdirection = s;
        state = 'iter';
        stop = outfcn (x, optimvalues, state);
        if (stop)
          info = -1;

      ## Tests for termination conditions. A mysterious place, anything
      ## can happen if you change something here...

      ## The rule of thumb (which I'm not sure M*b is quite following)
      ## is that for a tolerance that depends on scaling, only 0 makes
      ## sense as a default value. But 0 usually means uselessly long
      ## iterations, so we need scaling-independent tolerances wherever
      ## possible.

      ## The following tests done only after successful step.
      if (ratio >= 1e-4)
        ## This one is classic. Note that we use scaled variables again,
        ## but compare to scaled step, so nothing bad.
        if (sn <= tolx*xn)
          info = 2;
          ## Again a classic one.
        elseif (actred < tolf)
          info = 3;


  ## Restore original shapes.
  x = reshape (x, xsiz);

  output.iterations = niter;
  output.successful = nsuciter;
  output.funcCount = nfev;

  if (nargout > 5)
    hess = hesr'*hesr;


## An assistant function that evaluates a function handle and checks for
## bad results.
function [fx, gx] = guarded_eval (fun, x)
  if (nargout > 1)
    [fx, gx] = fun (x);
    fx = fun (x);
    gx = [];

  if (! (isreal (fx) && isreal (gx)))
    error ("fminunc:notreal", "fminunc: non-real value encountered");
  elseif (any (isnan (fx(:))))
    error ("fminunc:isnan", "fminunc: NaN value encountered");
  elseif (any (isinf (fx(:))))
    error ("fminunc:isinf", "fminunc: Inf value encountered");

%!function f = __rosenb (x)
%!  n = length (x);
%!  f = sumsq (1 - x(1:n-1)) + 100 * sumsq (x(2:n) - x(1:n-1).^2);
%! [x, fval, info, out] = fminunc (@__rosenb, [5, -5]);
%! tol = 2e-5;
%! assert (info > 0);
%! assert (x, ones (1, 2), tol);
%! assert (fval, 0, tol);
%! [x, fval, info, out] = fminunc (@__rosenb, zeros (1, 4));
%! tol = 2e-5;
%! assert (info > 0);
%! assert (x, ones (1, 4), tol);
%! assert (fval, 0, tol);
%% Test FunValCheck works correctly
%!assert (fminunc (@(x) x^2, 1, optimset ("FunValCheck", "on")), 0, eps)
%!error <non-real value> fminunc (@(x) x + i, 1, optimset ("FunValCheck", "on"))
%!error <NaN value> fminunc (@(x) x + NaN, 1, optimset ("FunValCheck", "on"))
%!error <Inf value> fminunc (@(x) x + Inf, 1, optimset ("FunValCheck", "on"))

## Solve the double dogleg trust-region minimization problem:
## Minimize 1/2*norm(r*x)^2  subject to the constraint norm(d.*x) <= delta,
## x being a convex combination of the gauss-newton and scaled gradient.

## TODO: error checks
## TODO: handle singularity, or leave it up to mldivide?

function x = __doglegm__ (r, g, d, delta)
  ## Get Gauss-Newton direction.
  b = r' \ g;
  x = r \ b;
  xn = norm (d .* x);
  if (xn > delta)
    ## GN is too big, get scaled gradient.
    s = g ./ d;
    sn = norm (s);
    if (sn > 0)
      ## Normalize and rescale.
      s = (s / sn) ./ d;
      ## Get the line minimizer in s direction.
      tn = norm (r*s);
      snm = (sn / tn) / tn;
      if (snm < delta)
        ## Get the dogleg path minimizer.
        bn = norm (b);
        dxn = delta/xn; snmd = snm/delta;
        t = (bn/sn) * (bn/xn) * snmd;
        t -= dxn * snmd^2 - sqrt ((t-dxn)^2 + (1-dxn^2)*(1-snmd^2));
        alpha = dxn*(1-snmd^2) / t;
        alpha = 0;
      alpha = delta / xn;
      snm = 0;
    ## Form the appropriate convex combination.
    x = alpha * x + ((1-alpha) * min (snm, delta)) * s;