changeset 10201:5c66978f3fdf

support TypicalX and AutoScaling in fsolve/fminunc, don't autoscale by default
author Jaroslav Hajek <highegg@gmail.com>
date Tue, 26 Jan 2010 11:55:18 +0100
parents 7c1b1c084af1
children f0e0775a2503
files scripts/ChangeLog scripts/optimization/fminunc.m scripts/optimization/fsolve.m scripts/optimization/private/__fdjac__.m
diffstat 4 files changed, 85 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/ChangeLog	Tue Jan 26 09:48:07 2010 +0100
+++ b/scripts/ChangeLog	Tue Jan 26 11:55:18 2010 +0100
@@ -1,3 +1,11 @@
+2010-01-26  Jaroslav Hajek  <highegg@gmail.com>
+
+	* optimization/fsolve.m: Support TypicalX, autoscale only if
+	AutoScaling is on, off by default. Fix default tolerances.
+	* optimization/fminunc.m: Support TypicalX, autoscale only if
+	AutoScaling is on, off by default Fix default tolerances..
+	* optimization/private/__fdjac__.m: Accept typicalx as a parameter.
+
 2010-01-26  Jaroslav Hajek  <highegg@gmail.com>
 
 	* optimization/pqpnonneg.m: If Cholesky update failed, switch off
--- a/scripts/optimization/fminunc.m	Tue Jan 26 09:48:07 2010 +0100
+++ b/scripts/optimization/fminunc.m	Tue Jan 26 11:55:18 2010 +0100
@@ -32,7 +32,8 @@
 ## Currently, @code{fminunc} recognizes these options:
 ## @code{"FunValCheck"}, @code{"OutputFcn"}, @code{"TolX"},
 ## @code{"TolFun"}, @code{"MaxIter"}, @code{"MaxFunEvals"}, 
-## @code{"GradObj"}, @code{"FinDiffType"}.
+## @code{"GradObj"}, @code{"FinDiffType"},
+## @code{"TypicalX"}, @code{"AutoScaling"}.
 ##
 ## If @code{"GradObj"} is @code{"on"}, it specifies that @var{fcn},
 ## called with 2 output arguments, also returns the Jacobian matrix
@@ -76,9 +77,10 @@
   ## Get default options if requested.
   if (nargin == 1 && ischar (fcn) && strcmp (fcn, 'defaults'))
     x = optimset ("MaxIter", 400, "MaxFunEvals", Inf, \
-    "GradObj", "off", "TolX", 1.5e-8, "TolFun", 1.5e-8,
+    "GradObj", "off", "TolX", 1e-7, "TolFun", 1e-7,
     "OutputFcn", [], "FunValCheck", "off",
-    "FinDiffType", "central");
+    "FinDiffType", "central",
+    "TypicalX", [], "AutoScaling", "off");
     return;
   endif
 
@@ -99,6 +101,17 @@
   maxfev = optimget (options, "MaxFunEvals", Inf);
   outfcn = optimget (options, "OutputFcn");
 
+  ## Get scaling matrix using the TypicalX option. If set to "auto", the
+  ## scaling matrix is estimated using the jacobian.
+  typicalx = optimget (options, "TypicalX");
+  if (isempty (typicalx))
+    typicalx = ones (n, 1);
+  endif
+  autoscale = strcmpi (optimget (options, "AutoScaling", "off"), "on");
+  if (! autoscale)
+    dg = 1 ./ typicalx;
+  endif
+
   funvalchk = strcmpi (optimget (options, "FunValCheck", "off"), "on");
 
   if (funvalchk)
@@ -111,8 +124,8 @@
 
   macheps = eps (class (x0));
 
-  tolx = optimget (options, "TolX", sqrt (macheps));
-  tolf = optimget (options, "TolFun", sqrt (macheps));
+  tolx = optimget (options, "TolX", 1e-7);
+  tolf = optimget (options, "TolFun", 1e-7);
 
   factor = 0.1;
   ## FIXME: TypicalX corresponds to user scaling (???)
@@ -157,7 +170,7 @@
       grad = grad(:);
       nfev ++;
     else
-      grad = __fdjac__ (fcn, reshape (x, xsiz), fval, cdif)(:);
+      grad = __fdjac__ (fcn, reshape (x, xsiz), fval, typicalx, cdif)(:);
       nfev += (1 + cdif) * length (x);
     endif
 
@@ -179,18 +192,23 @@
       endif
     endif
 
-    ## Second derivatives approximate the hessian.
-    d2f = norm (hesr, 'columns').';
+    if (autoscale)
+      ## Second derivatives approximate the hessian.
+      d2f = norm (hesr, 'columns').';
+      if (niter == 1)
+        dg = d2f;
+      else
+        ## FIXME: maybe fixed lower and upper bounds?
+        dg = max (0.1*dg, d2f);
+      endif
+    endif
+
     if (niter == 1)
-      dg = d2f;
       xn = norm (dg .* x);
       ## FIXME: something better?
       delta = factor * max (xn, 1);
     endif
 
-    ## FIXME: maybe fixed lower and upper bounds?
-    dg = max (0.1*dg, d2f);
-
     ## FIXME -- why tolf*n*xn? If abs (e) ~ abs(x) * eps is a vector
     ## of perturbations of x, then norm (hesr*e) <= eps*xn, i.e. by
     ## tolf ~ eps we demand as much accuracy as we can expect.
--- a/scripts/optimization/fsolve.m	Tue Jan 26 09:48:07 2010 +0100
+++ b/scripts/optimization/fsolve.m	Tue Jan 26 11:55:18 2010 +0100
@@ -33,7 +33,8 @@
 ## Currently, @code{fsolve} recognizes these options:
 ## @code{"FunValCheck"}, @code{"OutputFcn"}, @code{"TolX"},
 ## @code{"TolFun"}, @code{"MaxIter"}, @code{"MaxFunEvals"}, 
-## @code{"Jacobian"}, @code{"Updating"} and @code{"ComplexEqn"}.
+## @code{"Jacobian"}, @code{"Updating"}, @code{"ComplexEqn"}
+## @code{"TypicalX"}, @code{"AutoScaling"} and @code{"FinDiffType"}.
 ##
 ## If @code{"Jacobian"} is @code{"on"}, it specifies that @var{fcn},
 ## called with 2 output arguments, also returns the Jacobian matrix
@@ -41,6 +42,12 @@
 ## the termination tolerance in the unknown variables, while 
 ## @code{"TolFun"} is a tolerance for equations.  Default is @code{1e-7}
 ## for both @code{"TolX"} and @code{"TolFun"}.
+##
+## If @code{"AutoScaling"} is on, the variables will be automatically scaled
+## according to the column norms of the (estimated) Jacobian. As a result,
+## TolF becomes scaling-independent. By default, this option is off, because
+## it may sometimes deliver unexpected (though mathematically correct) results.
+## 
 ## If @code{"Updating"} is "on", the function will attempt to use Broyden
 ## updates to update the Jacobian, in order to reduce the amount of jacobian
 ## calculations.
@@ -124,9 +131,10 @@
   ## Get default options if requested.
   if (nargin == 1 && ischar (fcn) && strcmp (fcn, 'defaults'))
     x = optimset ("MaxIter", 400, "MaxFunEvals", Inf, \
-    "Jacobian", "off", "TolX", 1.5e-8, "TolFun", 1.5e-8,
+    "Jacobian", "off", "TolX", 1e-7, "TolFun", 1e-7,
     "OutputFcn", [], "Updating", "on", "FunValCheck", "off",
-    "ComplexEqn", "off", "FinDiffType", "central");
+    "ComplexEqn", "off", "FinDiffType", "central",
+    "TypicalX", [], "AutoScaling", "off");
     return;
   endif
 
@@ -151,6 +159,17 @@
   updating = strcmpi (optimget (options, "Updating", "on"), "on");
   complexeqn = strcmpi (optimget (options, "ComplexEqn", "off"), "on");
 
+  ## Get scaling matrix using the TypicalX option. If set to "auto", the
+  ## scaling matrix is estimated using the jacobian.
+  typicalx = optimget (options, "TypicalX");
+  if (isempty (typicalx))
+    typicalx = ones (n, 1);
+  endif
+  autoscale = strcmpi (optimget (options, "AutoScaling", "off"), "on");
+  if (! autoscale)
+    dg = 1 ./ typicalx;
+  endif
+
   funvalchk = strcmpi (optimget (options, "FunValCheck", "off"), "on");
 
   if (funvalchk)
@@ -163,8 +182,8 @@
 
   macheps = eps (class (x0));
 
-  tolx = optimget (options, "TolX", sqrt (macheps));
-  tolf = optimget (options, "TolFun", sqrt (macheps));
+  tolx = optimget (options, "TolX", 1e-7);
+  tolf = optimget (options, "TolFun", 1e-7);
 
   factor = 1;
 
@@ -211,7 +230,7 @@
       fvec = fvec(:);
       nfev ++;
     else
-      fjac = __fdjac__ (fcn, reshape (x, xsiz), fvec, cdif);
+      fjac = __fdjac__ (fcn, reshape (x, xsiz), fvec, typicalx, cdif);
       nfev += (1 + cdif) * length (x);
     endif
 
@@ -229,26 +248,31 @@
       [q, r] = qr (fjac, 0);
     endif
 
-    ## Get column norms, use them as scaling factors.
-    jcn = norm (fjac, 'columns').';
+    if (autoscale)
+      ## Get column norms, use them as scaling factors.
+      jcn = norm (fjac, 'columns').';
+      if (niter == 1)
+        dg = jcn;
+        dg(dg == 0) = 1;
+      else
+        ## Rescale adaptively.
+        ## FIXME: the original minpack used the following rescaling strategy:
+        ##   dg = max (dg, jcn);
+        ## but it seems not good if we start with a bad guess yielding jacobian
+        ## columns with large norms that later decrease, because the corresponding
+        ## variable will still be overscaled. So instead, we only give the old
+        ## scaling a small momentum, but do not honor it.
+
+        dg = max (0.1*dg, jcn);
+      endif
+    endif
+
     if (niter == 1)
-      dg = jcn;
-      dg(dg == 0) = 1;
       xn = norm (dg .* x);
       ## FIXME: something better?
       delta = factor * max (xn, 1);
     endif
 
-    ## Rescale adaptively.
-    ## FIXME: the original minpack used the following rescaling strategy:
-    ##   dg = max (dg, jcn);
-    ## but it seems not good if we start with a bad guess yielding jacobian
-    ## columns with large norms that later decrease, because the corresponding
-    ## variable will still be overscaled. So instead, we only give the old
-    ## scaling a small momentum, but do not honor it.
-
-    dg = max (0.1*dg, jcn);
-
     ## It also seems that in the case of fast (and inhomogeneously) changing
     ## jacobian, the Broyden updates are of little use, so maybe we could
     ## skip them if a big disproportional change is expected. The question is,
--- a/scripts/optimization/private/__fdjac__.m	Tue Jan 26 09:48:07 2010 +0100
+++ b/scripts/optimization/private/__fdjac__.m	Tue Jan 26 11:55:18 2010 +0100
@@ -21,10 +21,10 @@
 ## Undocumented internal function.
 ## @end deftypefn
 
-function fjac = __fdjac__ (fcn, x, fvec, cdif, err = 0)
+function fjac = __fdjac__ (fcn, x, fvec, typicalx, cdif, err = 0)
   if (cdif)
     err = (max (eps, err)) ^ (1/3);
-    h = max (abs (x), 1)*err; # FIXME?
+    h = typicalx*err;
     fjac = zeros (length (fvec), numel (x));
     for i = 1:numel (x)
       x1 = x2 = x;
@@ -34,7 +34,7 @@
     endfor
   else
     err = sqrt (max (eps, err));
-    h = max (abs (x), 1)*err; # FIXME?
+    h = typicalx*err;
     fjac = zeros (length (fvec), numel (x));
     for i = 1:numel (x)
       x1 = x;