Mercurial > forge

--- a/main/optim/bfgsmin.cc	Wed Sep 08 14:07:22 2004 +0000
+++ b/main/optim/bfgsmin.cc	Wed Sep 08 14:10:43 2004 +0000
@@ -16,7 +16,7 @@

 // bfgsmin
 //
-// bfgs minimization of function of form
+// bfgs or limited memory bfgs minimization of function of form
 //
 // [value, return_2,..., return_m] = f(arg_1, arg_2,..., arg_n)
 //
@@ -27,62 +27,170 @@
 #include <octave/Cell.h>
 #include <octave/lo-mappers.h>
 #include <float.h>
+#include "error.h"

 // define argument checks
 static bool
 any_bad_argument(const octave_value_list& args)
 {
+
+	int i;
+
   if (!args(0).is_string())
-    {
-      error("bfgsmin: first argument must be string holding objective function name");
-      return true;
-    }
+  {
+    error("bfgsmin: first argument must be string holding objective function name");
+    return true;
+  }
+
   if (!args(1).is_cell())
-    {
-      error("bfgsmin: second argument must cell array of function arguments");
-      return true;
-    }
-  if (args.length() == 3)
+  {
+    error("bfgsmin: second argument must cell array of function arguments");
+    return true;
+  }
+
+
+	// controls should be 4 or 5 element cell array of integers
+	if (args.length() > 2)
+  {
+		Cell control (args(2).cell_value()); // get them to look at them
+		if (error_state)
+		{
+			error("bfgsmin: 3rd argument must be a cell array of 4 or 5 integers");
+			return true;
+		}
+
+		// there should be 4 or 5 elements, depending on if bfgs or lbfgs
+		if (((control.length() < 4) || (control.length() > 5)))
     {
-      if (!args(2).is_cell() || (!(args(2).length() == 4)))
-    	{
-	  error("bfgsmin: 3rd argument, if supplied,  must a 4-element cell array of control options");
-	  return true;
-    	}
+			error("bfgsmin: 3rd argument must be a cell array of 4 or 5 integers");
+			return true;
     }
-  if (args.length() == 4)
+
+		// special treatment for 1st element - it can be Inf or an integer
+		if (!xisinf (control(0).double_value()))
+		{
+			int tmp = control(0).int_value();
+			if (error_state)
+			{
+				error("bfgsmin: 3rd argument must be a cell array of 4 or 5 integers");
+				return true;
+			}
+		}
+
+		// now the other 3 or 4 elements
+		for (i=1; i < control.length(); i++)
+		{
+			int tmp = control(i).int_value();
+			if (error_state)
+			{
+				error("bfgsmin: 3rd argument must be a cell array of 4 or 5 integers");
+				return true;
+			}
+		}
+  }
+
+
+	// tolerances should be 3 element cell array of doubles
+	if (args.length() > 3)
+  {
+    // type is cell
+		Cell tols (args(3).cell_value());
+		if (error_state)
+		{
+			error("bfgsmin: 4th argument must be a cell array of 3 positive doubles");
+			return true;
+		}
+
+		// there should be 3 elements
+		if (!(tols.length()==3))
     {
-      if (!args(3).is_cell() || (!(args(3).length() == 3)))
-    	{
-	  error("bfgsmin: 4th argument, if supplied, must a 3-element cell array of tolerances");
-	  return true;
-    	}
-    }
+			error("bfgsmin: 4th argument, if supplied, must a 3-element cell array of tolerances");
+			return true;
+    }
+
+		for (i=0; i<3; i++)
+		{
+			double tmp = tols(i).double_value();
+			if (error_state)
+			{
+				error("bfgsmin: 4th argument must be a cell array of 3 positive doubles");
+				return true;
+			}
+
+			if(tmp < 0)
+			{
+				error("bfgsmin: 4th argument must be a cell array of 3 positive doubles");
+				return true;
+			}
+		}
+  }

   return false;
 }

+// this is the lbfgs direction, used if control has 5 elements
+ColumnVector lbfgs_recursion(int memory, Matrix& sigmas, Matrix& gammas, ColumnVector d)
+{
+  if (memory == 0)
+  {
+    const int n = sigmas.columns();
+    ColumnVector sig = sigmas.column(n-1);
+    ColumnVector gam = gammas.column(n-1);
+    // do conditioning if there is any memory
+    double cond = gam.transpose()*gam;
+    if (cond > 0)
+		{
+	  	cond = (sig.transpose()*gam) / cond;
+	  	d = cond*d;
+		}
+    return d;
+  }
+  else
+  {
+    const int k = d.rows();
+    const int n = sigmas.columns();
+    int i, j;
+    ColumnVector sig = sigmas.column(memory-1);
+    ColumnVector gam = gammas.column(memory-1);
+    double rho;
+    rho = 1.0 / (gam.transpose() * sig);
+    double alpha;
+    alpha = rho * (sig.transpose() * d);
+    d = d - alpha*gam;
+    d = lbfgs_recursion(memory - 1, sigmas, gammas, d);
+    d = d + (alpha - rho * gam.transpose() * d) * sig;
+  }
+  return d;
+}
+

 DEFUN_DLD(bfgsmin, args, ,
-	  "bfgsmin: bfgs minimization of a function. See bfgsmin_example.m\n\
+"bfgsmin: bfgs minimization of a function with respect to a column vector.\n\
 \n\
-[x, obj, convergence] = bfgsmin(\"f\", {args}, {control}, {tols})\n\
+By default, numeric derivatives are used, but see bfgsmin_example.m\n\
+for how to use analytic derivatives\n\
+\n\
+Usage: [x, obj, convergence] = bfgsmin(\"f\", args, control, tols)\n\
 \n\
 Arguments:\n\
 * \"f\": function name (string)\n\
-* {args}: a cell array that holds all arguments of the function,\n\
-* {control}: an optional cell array of 4 elements\n\
+* args: a cell array that holds all arguments of the function\n\
+	The argument with respect to which minimization is done\n\
+	MUST be a COLUMN vector\n\
+* control: an optional cell array of 4 or 5 elements\n\
 	* elem 1: maximum iterations  (-1 = infinity (default))\n\
 	* elem 2: verbosity\n\
-		- 0 = no screen output (default)\n\
-		- 1 = summary every iteration\n\
-		- 2 = only last iteration summary\n\
+		0 = no screen output (default)\n\
+		1 = summary every iteration\n\
+		2 = only final results\n\
 	* elem 3: convergence criterion\n\
-		 - 1 = strict (function, gradient and param change) (default)\n\
-		 - 2 = weak - only function convergence required\n\
+		1 = strict (function, gradient and param change) (default)\n\
+		2 = weak - only function convergence required\n\
 	* elem 4: arg with respect to which minimization is done\n\
 		(default = first)\n\
-* {tols}: an optional cell array of 3 elements\n\
+	* elem 5: (optimal) Memory limit for lbfgs. If it's a positive integer\n\
+		then lbfgs will be use. Otherwise ordinary bfgs is used\n\
+* tols: an optional cell array of 3 elements\n\
 	* elem 1: function change tolerance, default 1e-12\n\
 	* elem 2: parameter change tolerance, default 1e-6\n\
 	* elem 3: gradient tolerance, default 1e-4\n\
@@ -118,10 +226,10 @@
 {
   int nargin = args.length();
   if ((nargin < 2) || (nargin > 4))
-    {
-      error("bfgsmin: you must supply 2, 3 or 4 arguments");
-      return octave_value_list();
-    }
+  {
+    error("bfgsmin: you must supply 2, 3 or 4 arguments");
+    return octave_value_list();
+  }

   // check the arguments
   if (any_bad_argument(args)) return octave_value_list();
@@ -138,68 +246,92 @@

   int max_iters, verbosity, criterion, minarg;
   int convergence, iter;
-  int gradient_failed, i, j, k, conv_fun, conv_param, conv_grad;
+  int memory, gradient_failed, i, j, k, conv_fun, conv_param, conv_grad;
   int have_gradient = 0;
   double func_tol, param_tol, gradient_tol, stepsize, obj_value;
   double obj_in, last_obj_value, denominator, test;
   Matrix H, tempmatrix, H1, H2;
-  ColumnVector thetain, d, g, g_new, p, q;
+  ColumnVector thetain, d, g, g_new, p, q, sig, gam;
+
+
+  // Default values for controls
+  max_iters = INT_MAX; // no limit on iterations
+  verbosity = 0; // by default don't report results to screen
+  criterion = 1; // strong convergence required
+  minarg = 1; // by default, first arg is one over which we minimize
+	memory = 0;

   // use provided controls, if applicable
-  if (args.length() >= 3)
-    {
-      Cell control (args(2).cell_value());
-      //      if (xisinf (control(0).double_value()))
-      //	max_iters = -1;
-      //else
-	max_iters = control(0).int_value();
-      if (max_iters == -1) max_iters = INT_MAX;
-      verbosity = control(1).int_value();
-      criterion = control(2).int_value();
-      minarg = control(3).int_value();
-    }
-  else
-    {
-      // Default values for controls
-      max_iters = INT_MAX; // no limit on iterations
-      verbosity = 0; // by default don't report results to screen
-      criterion = 1; // strong convergence required
-      minarg = 1; // by default, first arg is one over which we minimize
-    }
-
+  if (args.length() > 2)
+  {
+    Cell control (args(2).cell_value());
+    if (xisinf (control(0).double_value()))  // this is to allow 'Inf' as first element of control
+      max_iters = -1;
+    else
+			max_iters = control(0).int_value();
+    if (max_iters == -1) max_iters = INT_MAX;
+    verbosity = control(1).int_value();
+    criterion = control(2).int_value();
+    minarg = control(3).int_value();
+		if (control.length() > 4) memory = control(4).int_value();
+  }
+
+
+	// type checking for minimization parameter done here, since we don't know minarg
+	// until now
+	if (!f_args(minarg - 1).is_real_matrix())
+	{
+		error("bfgsmin: minimization must be with respect to a column vector");
+		return octave_value_list();
+	}
+	if (f_args(minarg - 1).columns() != 1)
+	{
+		error("bfgsmin: minimization must be with respect to a column vector");
+		return octave_value_list();
+	}
+

   // use provided tolerances, if applicable
   if (args.length() == 4)
-    {
-      Cell tols (args(3).cell_value());
-      func_tol = tols(0).double_value();
-      param_tol = tols(1).double_value();
-      gradient_tol = tols(2).double_value();
-    }
+	{
+    Cell tols (args(3).cell_value());
+    func_tol = tols(0).double_value();
+    param_tol = tols(1).double_value();
+    gradient_tol = tols(2).double_value();
+  }
   else
-    {
-      // Default values for tolerances
-      func_tol  = 1e-12;
-      param_tol = 1e-6;
-      gradient_tol = 1e-4;
-
-    }
+  {
+    // Default values for tolerances
+    func_tol  = 1e-12;
+    param_tol = 1e-6;
+    gradient_tol = 1e-4;
+  }

-  step_args(0) = f;
+
+	// arguments for stepsize function
+	step_args(0) = f;
   step_args(1) = f_args;
   step_args(3) = minarg;

+	// arguments for numgradient
   g_args(0) = f;
   g_args(1) = f_args;
   g_args(2) = minarg;

+	// arguments for celleval (to calculate objective function value)
   c_args(0) = f;
   c_args(1) = f_args;

-  ColumnVector theta  = f_args(minarg - 1).column_vector_value();
-
+
+	// get the minimization argument
+	ColumnVector theta  = f_args(minarg - 1).column_vector_value();
   k = theta.rows();

+		// containers for items in limited memory version
+  Matrix sigmas(k,memory);
+ 	Matrix gammas(k,memory);
+
+
   // initialize things
   convergence = -1; // if this doesn't change, it means that maxiters were exceeded
   thetain = theta;
@@ -253,9 +385,15 @@
       conv_param =  -1;
       conv_grad = -1;

-      d = -H*g;
+			if(memory > 0) // lbfgs
+			{
+				if (iter < memory) d = lbfgs_recursion(iter, sigmas, gammas, g);
+	      else d = lbfgs_recursion(memory, sigmas, gammas, g);
+	      d = -d;
+			}
+			else d = -H*g; // ordinary bfgs

-      // stepsize: try bfgs direction, then steepest descent if it fails
+      // stepsize: try (l)bfgs direction, then steepest descent if it fails
       step_args(2) = d;
       f_args(minarg - 1) = theta;
       step_args(1) = f_args;
@@ -263,72 +401,79 @@
       stepsize = f_return(0).double_value();
       obj_value = f_return(1).double_value();
       if (stepsize == 0.0) // fall back to steepest descent
-	{
-	  d = -g; // try steepest descent
-	  step_args(2) = d;
-	  f_return = feval("newtonstep", step_args);
-	  stepsize = f_return(0).double_value();
-	  obj_value = f_return(1).double_value();
-	}
+			{
+	  		d = -g; // try steepest descent
+	  		step_args(2) = d;
+	  		f_return = feval("newtonstep", step_args);
+	  		stepsize = f_return(0).double_value();
+	  		obj_value = f_return(1).double_value();
+			}

       p = stepsize*d;

       // check normal convergence: all 3 must be satisfied
       // function convergence
       if (fabs(last_obj_value) > 1.0)
-	{
-	  conv_fun = (fabs(obj_value - last_obj_value)/fabs(last_obj_value)) < func_tol;
-	}
+			{
+	  		conv_fun = (fabs(obj_value - last_obj_value)/fabs(last_obj_value)) < func_tol;
+			}
       else
-	{
-	  conv_fun = fabs(obj_value - last_obj_value) < func_tol;
-	}
+			{
+	  		conv_fun = fabs(obj_value - last_obj_value) < func_tol;
+			}
       // parameter change convergence
       test = sqrt(theta.transpose() * theta);
       if (test > 1)
-	{
-	  conv_param = sqrt(p.transpose() * p) / test < param_tol ;
-
-	}
+			{
+	  		conv_param = sqrt(p.transpose() * p) / test < param_tol ;
+
+			}
       else
-	{
-	  conv_param = sqrt(p.transpose() * p) < param_tol;
-	}
+			{
+	  		conv_param = sqrt(p.transpose() * p) < param_tol;
+			}
       // gradient convergence
       conv_grad = sqrt(g.transpose() * g) < gradient_tol;


       // Want intermediate results?
-      if (verbosity == 1)
-	{
-	  printf("\nBFGSMIN intermediate results: Iteration %d\n",iter);
-	  printf("Stepsize %8.7f\n", stepsize);
-	  if (have_gradient) printf("Using analytic gradient\n");
-	  else printf("Using numeric gradient\n");
-	  printf("Objective function value %16.10f\n", last_obj_value);
-	  printf("Function conv %d  Param conv %d  Gradient conv %d\n", conv_fun, conv_param, conv_grad);
-	  printf("  params  gradient  change\n");
-	  for (j = 0; j<k; j++)
-	    {
-	      printf("%8.4f %8.4f %8.4f\n",theta(j),g(j),p(j));
-	    }
-	  printf("\n");
-	}
+      if ((verbosity > 0) && (verbosity < 2))
+			{
+   			printf("\n======================================================\n");
+ 				printf("BFGSMIN intermediate results\n");
+				printf("\n");
+				if (memory > 0) printf("Using LBFGS, memory is last %d iterations\n",memory);
+    		if (have_gradient) printf("Using analytic gradient\n");
+    		else printf("Using numeric gradient\n");
+				printf("\n");
+				printf("------------------------------------------------------\n");
+    		printf("Function conv %d  Param conv %d  Gradient conv %d\n", conv_fun, conv_param, conv_grad);
+				printf("------------------------------------------------------\n");
+ 				printf("Objective function value %g\n", last_obj_value);
+				printf("Stepsize %g\n", stepsize);
+				printf("%d iterations\n", iter);
+				printf("------------------------------------------------------\n");
+    		printf("\n param    gradient  change\n");
+    		for (j = 0; j<k; j++)
+				{
+					printf("%8.4f %8.4f %8.4f\n",theta(j),g(j),p(j));
+				}
+			}

       // Are we done?
       if (criterion == 1)
-	{
-	  if (conv_fun && conv_param && conv_grad)
-	    {
-	      convergence = 1;
-	      break;
-	    }
-	}
+			{
+	  		if (conv_fun && conv_param && conv_grad)
+	    	{
+	      	convergence = 1;
+	      	break;
+	    	}
+			}
       else if (conv_fun)
-	{
-	  convergence = 1;
-	  break;
-	}
+			{
+	  		convergence = 1;
+	  		break;
+			}


       last_obj_value = obj_value;
@@ -337,71 +482,114 @@
       // new gradient
       f_args(minarg - 1) = theta;
       if (have_gradient)
-	{
-	  c_args(1) = f_args;
-	  f_return = feval("celleval", c_args);
-	  g_new = f_return(1).column_vector_value();
-	}
+			{
+	  		c_args(1) = f_args;
+	  		f_return = feval("celleval", c_args);
+	  		g_new = f_return(1).column_vector_value();
+			}
       else // use numeric gradient
-	{
-	  g_args(1) = f_args;
-	  f_return = feval("numgradient", g_args);
-	  tempmatrix = f_return(0).matrix_value();
-	  g_new = tempmatrix.row(0).transpose();
-	}
+			{
+	  		g_args(1) = f_args;
+	  		f_return = feval("numgradient", g_args);
+	  		tempmatrix = f_return(0).matrix_value();
+	  		g_new = tempmatrix.row(0).transpose();
+			}

       // Check that gradient is ok
       gradient_failed = 0;  // test = 1 means gradient failed
       for (i=0; i<k;i++)
-	{
-	  gradient_failed = gradient_failed + xisnan(g_new(i));
-	}
+			{
+	  		gradient_failed = gradient_failed + xisnan(g_new(i));
+			}

-      // Hessian update if gradient ok
-      if (!gradient_failed)
-	{
-	  q = g_new-g;
-	  g = g_new;
-	  denominator = q.transpose()*p;
-	  if ((fabs(denominator) < DBL_EPSILON))  // reset Hessian if necessary
-	    {
-	      if (verbosity == 1) printf("bfgsmin: Hessian reset\n");
-	      H = identity_matrix(k,k);
-	    }
-	  else
-	    {
-	      H1 = (1.0+(q.transpose() * H * q) / denominator) / denominator \
-		* (p * p.transpose());
-	      H2 = (p * q.transpose() * H + H*q*p.transpose());
-	      H2 = H2 / denominator;
-	      H = H + H1 - H2;
-	    }
-	}
-      else H = identity_matrix(k,k); // reset hessian if gradient fails
-      // then try to start again with steepest descent
+			if (memory == 0) //use bfgs
+			{
+      	// Hessian update if gradient ok
+      	if (!gradient_failed)
+				{
+	  			q = g_new-g;
+	  			g = g_new;
+	  			denominator = q.transpose()*p;
+	  			if ((fabs(denominator) < DBL_EPSILON))  // reset Hessian if necessary
+	    		{
+	      		if (verbosity == 1) printf("bfgsmin: Hessian reset\n");
+	      		H = identity_matrix(k,k);
+	    		}
+	  			else
+	    		{
+	      		H1 = (1.0+(q.transpose() * H * q) / denominator) / denominator \
+						* (p * p.transpose());
+	      		H2 = (p * q.transpose() * H + H*q*p.transpose());
+	      		H2 = H2 / denominator;
+	      		H = H + H1 - H2;
+	    		}
+				}
+      	else H = identity_matrix(k,k); // reset hessian if gradient fails
+      	// then try to start again with steepest descent
+			}
+			else // use lbfgs
+			{
+				// save components for Hessian if gradient ok
+      	if (!gradient_failed)
+				{
+	  			sig = p;    // change in parameter
+	  			gam = g_new - g; 	// change in gradient
+	  			g = g_new;
+
+	  			// shift remembered vectors to the right (forget last)
+	  			for(j = memory - 1; j > 0; j--)
+	  			{
+	    			for(i = 0; i < k; i++)
+						{
+		  				sigmas(i,j) = sigmas(i,j-1);
+		  				gammas(i,j) = gammas(i,j-1);
+						}
+	  			}
+
+	  			// insert new vectors in left-most column
+	  			for(i = 0; i < k; i++)
+	  			{
+	    			sigmas(i, 0) = sig(i);
+	    			gammas(i, 0) = gam(i);
+	  			}
+				}
+			 	else // failed gradient - loose memory and use previous theta
+				{
+	  			sigmas.fill(0.0);
+	  			gammas.fill(0.0);
+	  			theta = theta - p;
+				}
+			}
     }

   // Want last iteration results?
-  if (verbosity == 2)
-    {
-      printf("\n================================================\n");
-      printf("BFGSMIN final results\n");
-      if (convergence == -1) printf("NO CONVERGENCE: maxiters exceeded\n");
-      if ((convergence == 1) & (criterion == 1)) printf("STRONG CONVERGENCE\n");
-      if ((convergence == 1) & !(criterion == 1)) printf("WEAK CONVERGENCE\n");
-      if (convergence == 2) printf("NO CONVERGENCE: algorithm failed\n");
-      if (have_gradient) printf("Used analytic gradient\n");
-      else printf("Used numeric gradient\n");
-      printf("\nObj. fn. value %f     Iteration %d\n", obj_value, iter);
-      printf("Function conv %d  Param conv %d  Gradient conv %d\n", conv_fun, conv_param, conv_grad);
-      printf("\n  param  gradient  change\n");
-      for (j = 0; j<k; j++)
-	{
-	  printf("%8.4f %8.4f %8.4f\n",theta(j),g(j),p(j));
-	}
-      printf("================================================\n");
+  if (verbosity > 0)
+  {

-    }
+    printf("\n======================================================\n");
+ 		printf("BFGSMIN final results\n");
+		printf("\n");
+		if (memory > 0) printf("Used LBFGS, memory is last %d iterations\n",memory);
+    if (have_gradient) printf("Used analytic gradient\n");
+    else printf("Used numeric gradient\n");
+		printf("\n");
+		printf("------------------------------------------------------\n");
+ 		if (convergence == -1)                      printf("NO CONVERGENCE: max iters exceeded\n");
+    if ((convergence == 1) & (criterion == 1))  printf("STRONG CONVERGENCE\n");
+    if ((convergence == 1) & !(criterion == 1)) printf("WEAK CONVERGENCE\n");
+    if (convergence == 2)                       printf("NO CONVERGENCE: algorithm failed\n");
+    printf("Function conv %d  Param conv %d  Gradient conv %d\n", conv_fun, conv_param, conv_grad);
+		printf("------------------------------------------------------\n");
+ 		printf("Objective function value %g\n", last_obj_value);
+		printf("Stepsize %g\n", stepsize);
+		printf("%d iterations\n", iter);
+		printf("------------------------------------------------------\n");
+    printf("\n param    gradient  change\n");
+    for (j = 0; j<k; j++)
+		{
+			printf("%8.4f %8.4f %8.4f\n",theta(j),g(j),p(j));
+		}
+  }
   f_return(0) = theta;
   f_return(1) = obj_value;
   f_return(2) = convergence;
--- a/main/optim/bfgsmin_example.m	Wed Sep 08 14:07:22 2004 +0000
+++ b/main/optim/bfgsmin_example.m	Wed Sep 08 14:10:43 2004 +0000
@@ -16,14 +16,23 @@

 1;
 # This shows how to call bfgsmin.m
+
+# You can minimize w.r.t. any argument, but the argument minimization is done over
+# must be a column vector.
+
+# It is possible to supply analytic derivatives. If the second return of the objective
+# function is a column vector the same size as the minimization argument, it is assumed
+# to be the gradient vector.
+
 page_screen_output = 0;

 # example obj. fn
 function obj_value = objective1(x, y)
-	obj_value = log(1 + x'*x) + log(1 + y'*y);
+	z = y - ones(size(y));
+	obj_value = log(1 + x'*x) + log(1 + z'*z);
 endfunction

-# example obj. fn.: with gradient
+# example obj. fn.: with gradient w.r.t. x
 function [obj_value, gradient] = objective2(x, y)
 	obj_value = (1 + x'*x) + (1 + y'*y);
 	gradient = 2*x; #(1/(1 + x'*x))*2*x;
@@ -31,20 +40,20 @@

 # Check bfgsmin, illustrating variations

-printf("EXAMPLE 1: Numeric gradient\n");
+printf("\nEXAMPLE 1: Numeric gradient\n");
 x0 = ones(2,1);
 y0 = 2*ones(2,1);
 control = {10,2,1,1};  # maxiters, verbosity, conv. reg., arg_to_min
 [theta, obj_value, convergence] = bfgsmin("objective1", {x0, y0}, control);
 printf("\n");

-printf("EXAMPLE 2: Analytic gradient\n");
+printf("\nEXAMPLE 2: Analytic gradient\n");
 control = {50,2,1,1};  # maxiters, verbosity, conv. reg., arg_to_min
 [theta, obj_value, convergence] = bfgsmin("objective2", {x0, y0}, control);
 printf("\n");


-printf("EXAMPLE 3: Numeric gradient, non-default tolerances\n");
+printf("\nEXAMPLE 3: Numeric gradient, non-default tolerances\n");
 printf("Note how the gradient tolerance can't be achieved with\n");
 printf("numeric differentiation\n");
 control = {100,2,1,1};  # maxiters, verbosity, conv. reg., arg_to_min
@@ -52,8 +61,27 @@
 [theta, obj_value, convergence] = bfgsmin("objective1", {x0, y0}, control, tols);
 printf("\n");

-printf("EXAMPLE 4: Funny case - min wrt 2nd arg.");
+printf("\nEXAMPLE 4: Funny case - min wrt 2nd arg.\n");
 control = {50,2,1,2};  # maxiters, verbosity, conv. reg., arg_to_min
 [theta, obj_value, convergence] = bfgsmin("objective1", {x0, y0}, control);


+printf("\nEXAMPLE 5: Limited memory BFGS, Numeric gradient\n");
+x0 = ones(2,1);
+y0 = 2*ones(2,1);
+control = {10,2,1,1, 3};  # maxiters, verbosity, conv. reg., arg_to_min
+[theta, obj_value, convergence] = bfgsmin("objective1", {x0, y0}, control);
+printf("\n");
+
+
+printf("\nEXAMPLE 6: Limited memory BFGS, Analytic gradient\n");
+control = {50,2,1,1,3};  # maxiters, verbosity, conv. reg., arg_to_min
+[theta, obj_value, convergence] = bfgsmin("objective2", {x0, y0}, control);
+printf("\n");
+
+
+
+
+
+
+