Mercurial > forge
changeset 6878:0b351dd86cf5 octave-forge
improvements to convergence checks, fixes a potential spurious failure message
author | mcreel |
---|---|
date | Wed, 17 Mar 2010 10:11:31 +0000 |
parents | 93c26c5f7d42 |
children | d2d3fbcb2142 |
files | main/optim/src/__bfgsmin.cc |
diffstat | 1 files changed, 56 insertions(+), 50 deletions(-) [+] |
line wrap: on
line diff
--- a/main/optim/src/__bfgsmin.cc Tue Mar 16 17:31:08 2010 +0000 +++ b/main/optim/src/__bfgsmin.cc Wed Mar 17 10:11:31 2010 +0000 @@ -1,4 +1,4 @@ -// Copyright (C) 2004,2005,2006,2007 Michael Creel <michael.creel@uab.es> +// Copyright (C) 2004,2005,2006,2007,2010 Michael Creel <michael.creel@uab.es> // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -28,9 +28,7 @@ #include <octave/Cell.h> #include <float.h> #include "error.h" -#ifdef __MINGW32__ -#define isnan _isnan -#endif + int __bfgsmin_obj(double &obj, const std::string f, const octave_value_list f_args, const ColumnVector theta, const int minarg) { @@ -164,13 +162,13 @@ ColumnVector trial; // initial values best_obj = obj; - improvement_0 = 0.0; + improvement_0 = 0.0; step = 1.0; trial = x + step*dx; __bfgsmin_obj(obj, f, f_args, trial, minarg); if (verbose) printf("bisectionstep: trial step: %g obj value: %g\n", step, obj); // this first loop goes until an improvement is found - while (obj >= best_obj) { + while (obj > best_obj) { if (step < 2.0*DBL_EPSILON) { if (verbose) warning("bisectionstep: unable to find improvement, setting step to zero"); step = 0.0; @@ -212,7 +210,7 @@ double obj_0, obj_left, obj_right, delta, inv_delta_sq, gradient, hessian; int found_improvement = 0; obj_0 = obj; - delta = 0.001; // experimentation show that this is a good choice + delta = 0.001; // experimentation shows that this is a good choice inv_delta_sq = 1.0 / (delta*delta); ColumnVector x_right = x + delta*dx; ColumnVector x_left = x - delta*dx; @@ -225,19 +223,13 @@ hessian = fabs(hessian); // ensures we're going in a decreasing direction if (hessian < 2.0*DBL_EPSILON) hessian = 1.0; // avoid div by zero step = - gradient / hessian; // hessian inverse gradient: the Newton step - if (step < 0.0) { // step should be positive. If it is not, go to bisection step - if (verbose) warning("__stepsize: no improvement with Newton step, falling back to bisection"); - obj = obj_0; - found_improvement = __bisectionstep(step, obj, f, f_args, x, dx, minarg, verbose); - return 0; - } step = (step < 1.0)*step + 1.0*(step >= 1.0); // maximum stepsize is 1.0 - conservative // ensure that this is improvement, and if not, fall back to bisection __bfgsmin_obj(obj, f, f_args, x + step*dx, minarg); if (verbose) printf("newtonstep: trial step: %g obj value: %g\n", step, obj); - if (obj >= obj_0) { + if (obj > obj_0) { obj = obj_0; - if (verbose) warning("__stepsize: no improvement with Newton step, falling back to bisection"); + if (verbose) warning("__stepsize: no improvement with Newton step, falling back to bisection"); found_improvement = __bisectionstep(step, obj, f, f_args, x, dx, minarg, verbose); } else found_improvement = 1; @@ -260,7 +252,7 @@ gradient_ok, i, j, k, conv_fun, conv_param, conv_grad, have_gradient, \ try_gradient, warnings; double func_tol, param_tol, gradient_tol, stepsize, obj_value, obj_in, \ - last_obj_value, denominator, test; + last_obj_value, obj_value2, denominator, test; Matrix H, H1, H2; ColumnVector thetain, d, g, g_new, p, q, sig, gam; @@ -308,8 +300,8 @@ // Initial gradient (try analytic, and use it if it's close enough to numeric) __bfgsmin_gradient(g, f, f_args, theta, minarg, 1, have_gradient); // try analytic - if (warnings) printf("function claims to provide analytic gradient\n"); - if (have_gradient) { // check equality if analytic available + if (have_gradient) { // check equality if analytic available + if (warnings) printf("function claims to provide analytic gradient\n"); have_gradient = 0; // force numeric __bfgsmin_gradient(g_new, f, f_args, theta, minarg, 0, have_gradient); p = g - g_new; @@ -323,16 +315,42 @@ // MAIN LOOP STARTS HERE for (iter = 0; iter < max_iters; iter++) { obj_in = obj_value; - // make sure the messages aren't stale - conv_fun = -1; - conv_param = -1; - conv_grad = -1; if(memory > 0) { // lbfgs if (iter < memory) d = lbfgs_recursion(iter, sigmas, gammas, g); else d = lbfgs_recursion(memory, sigmas, gammas, g); d = -d; } else d = -H*g; // ordinary bfgs + // convergence tests + conv_fun = 0; + conv_param = 0; + conv_grad = 0; + // function convergence + __bfgsmin_obj(obj_value, f, f_args, theta+d, minarg); + if (fabs(last_obj_value) > 1.0) { + conv_fun = (fabs(obj_value - last_obj_value)/fabs(last_obj_value)) < func_tol; + } + else { + conv_fun = fabs(obj_value - last_obj_value) < func_tol; + } + // parameter change convergence + test = sqrt(theta.transpose() * theta); + if (test > 1.0) conv_param = sqrt(d.transpose() * d) / test < param_tol ; + else conv_param = sqrt(d.transpose() * d) < param_tol; // Want intermediate results? + // gradient convergence + conv_grad = sqrt(g.transpose() * g) < gradient_tol; + // Are we done? + if (criterion == 1) { + if (conv_fun && conv_param && conv_grad) { + convergence = 1; + break; + } + } + else if (conv_fun) { + convergence = 1; + break; + } + // if not done, then take a step // stepsize: try (l)bfgs direction, then steepest descent if it fails f_args(minarg - 1) = theta; __newtonstep(stepsize, obj_value, f, f_args, theta, d, minarg, warnings); @@ -352,43 +370,31 @@ } } p = stepsize*d; - // check normal convergence: all 3 must be satisfied - // function convergence - if (fabs(last_obj_value) > 1.0) { - conv_fun = (fabs(obj_value - last_obj_value)/fabs(last_obj_value)) < func_tol; - } - else { - conv_fun = fabs(obj_value - last_obj_value) < func_tol; - } - // parameter change convergence - test = sqrt(theta.transpose() * theta); - if (test > 1.0) conv_param = sqrt(p.transpose() * p) / test < param_tol ; - else conv_param = sqrt(p.transpose() * p) < param_tol; // Want intermediate results? - // gradient convergence - conv_grad = sqrt(g.transpose() * g) < gradient_tol; - // Want intermediate results? + // Want intermediate results? if (verbosity > 1) { printf("------------------------------------------------\n"); printf("bfgsmin iteration %d convergence (f g p): %d %d %d\n", iter, conv_fun, conv_grad, conv_param); if (warnings) { if (memory > 0) printf("Using LBFGS, memory is last %d iterations\n",memory); } - printf("\nfunction value: %g stepsize: %g \n\n", obj_value, stepsize); + printf("\nfunction value: %g stepsize: %g \n\n", last_obj_value, stepsize); if (have_gradient) printf("used analytic gradient\n"); else printf("used numeric gradient\n"); for (j = 0; j<k; j++) printf("%15.5f %15.5f %15.5f\n",theta(j), g(j), p(j)); } - // Are we done? - if (criterion == 1) { - if (conv_fun && conv_param && conv_grad) { - convergence = 1; - break; - } - } - else if (conv_fun) { - convergence = 1; - break; - } + //-------------------------------------------------- + // // Are we done? + // if (criterion == 1) { + // if (conv_fun && conv_param && conv_grad) { + // convergence = 1; + // break; + // } + // } + // else if (conv_fun) { + // convergence = 1; + // break; + // } + //-------------------------------------------------- last_obj_value = obj_value; theta = theta + p; // new gradient @@ -456,7 +462,7 @@ if (have_gradient) printf("used analytic gradient\n"); else printf("used numeric gradient\n"); printf(" param gradient (n) change\n"); - for (j = 0; j<k; j++) printf("%15.5f %15.5f %15.5f\n",theta(j), g(j), p(j)); + for (j = 0; j<k; j++) printf("%15.5f %15.5f %15.5f\n",theta(j), g(j), d(j)); } f_return(3) = iter; f_return(2) = convergence;