Mercurial > octave-nkf
comparison scripts/optimization/sqp.m @ 10678:35338deff753
Guarantee equivalent results if sqp called with or wihout bounds
(bug #29989). Simplify input option handling and add %tests
to check validation code. Rewrite documentation string.
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Wed, 02 Jun 2010 11:56:26 -0700 |
parents | 95c3e38098bf |
children | 693e22af08ae |
comparison
equal
deleted
inserted
replaced
10677:21defab4207c | 10678:35338deff753 |
---|---|
15 ## You should have received a copy of the GNU General Public License | 15 ## You should have received a copy of the GNU General Public License |
16 ## along with Octave; see the file COPYING. If not, see | 16 ## along with Octave; see the file COPYING. If not, see |
17 ## <http://www.gnu.org/licenses/>. | 17 ## <http://www.gnu.org/licenses/>. |
18 | 18 |
19 ## -*- texinfo -*- | 19 ## -*- texinfo -*- |
20 ## @deftypefn {Function File} {[@var{x}, @var{obj}, @var{info}, @var{iter}, @var{nf}, @var{lambda}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}, @var{lb}, @var{ub}, @var{maxiter}, @var{tolerance}) | 20 ## @deftypefn {Function File} {[@var{x}, @var{obj}, @var{info}, @var{iter}, @var{nf}, @var{lambda}] =} sqp (@var{x}, @var{phi}) |
21 ## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}) | |
22 ## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}) | |
23 ## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}, @var{lb}, @var{ub}) | |
24 ## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}, @var{lb}, @var{ub}, @var{maxiter}) | |
25 ## @deftypefnx {Function File} {[@dots{}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}, @var{lb}, @var{ub}, @var{maxiter}, @var{tolerance}) | |
21 ## Solve the nonlinear program | 26 ## Solve the nonlinear program |
22 ## @tex | 27 ## @tex |
23 ## $$ | 28 ## $$ |
24 ## \min_x \phi (x) | 29 ## \min_x \phi (x) |
25 ## $$ | 30 ## $$ |
58 ## | 63 ## |
59 ## The second argument is a function handle pointing to the objective | 64 ## The second argument is a function handle pointing to the objective |
60 ## function. The objective function must be of the form | 65 ## function. The objective function must be of the form |
61 ## | 66 ## |
62 ## @example | 67 ## @example |
63 ## y = phi (x) | 68 ## @var{y} = phi (@var{x}) |
64 ## @end example | 69 ## @end example |
65 ## | 70 ## |
66 ## @noindent | 71 ## @noindent |
67 ## in which @var{x} is a vector and @var{y} is a scalar. | 72 ## in which @var{x} is a vector and @var{y} is a scalar. |
68 ## | 73 ## |
69 ## The second argument may also be a 2- or 3-element cell array of | 74 ## The second argument may also be a 2- or 3-element cell array of |
70 ## function handles. The first element should point to the objective | 75 ## function handles. The first element should point to the objective |
71 ## function, the second should point to a function that computes the | 76 ## function, the second should point to a function that computes the |
72 ## gradient of the objective function, and the third should point to a | 77 ## gradient of the objective function, and the third should point to a |
73 ## function to compute the hessian of the objective function. If the | 78 ## function that computes the Hessian of the objective function. If the |
74 ## gradient function is not supplied, the gradient is computed by finite | 79 ## gradient function is not supplied, the gradient is computed by finite |
75 ## differences. If the hessian function is not supplied, a BFGS update | 80 ## differences. If the Hessian function is not supplied, a BFGS update |
76 ## formula is used to approximate the hessian. | 81 ## formula is used to approximate the Hessian. |
77 ## | 82 ## |
78 ## If supplied, the gradient function must be of the form | 83 ## When supplied, the gradient function must be of the form |
79 ## | 84 ## |
80 ## @example | 85 ## @example |
81 ## g = gradient (x) | 86 ## @var{g} = gradient (@var{x}) |
82 ## @end example | 87 ## @end example |
83 ## | 88 ## |
84 ## @noindent | 89 ## @noindent |
85 ## in which @var{x} is a vector and @var{g} is a vector. | 90 ## in which @var{x} is a vector and @var{g} is a vector. |
86 ## | 91 ## |
87 ## If supplied, the hessian function must be of the form | 92 ## When supplied, the Hessian function must be of the form |
88 ## | 93 ## |
89 ## @example | 94 ## @example |
90 ## h = hessian (x) | 95 ## @var{h} = hessian (@var{x}) |
91 ## @end example | 96 ## @end example |
92 ## | 97 ## |
93 ## @noindent | 98 ## @noindent |
94 ## in which @var{x} is a vector and @var{h} is a matrix. | 99 ## in which @var{x} is a vector and @var{h} is a matrix. |
95 ## | 100 ## |
96 ## The third and fourth arguments are function handles pointing to | 101 ## The third and fourth arguments are function handles pointing to |
97 ## functions that compute the equality constraints and the inequality | 102 ## functions that compute the equality constraints and the inequality |
98 ## constraints, respectively. | 103 ## constraints, respectively. |
99 ## | 104 ## |
100 ## If your problem does not have equality (or inequality) constraints, | 105 ## If the problem does not have equality (or inequality) constraints, |
101 ## you may pass an empty matrix for @var{cef} (or @var{cif}). | 106 ## then use an empty matrix ([]) for @var{cef} (or @var{cif}). |
102 ## | 107 ## |
103 ## If supplied, the equality and inequality constraint functions must be | 108 ## When supplied, the equality and inequality constraint functions must be |
104 ## of the form | 109 ## of the form |
105 ## | 110 ## |
106 ## @example | 111 ## @example |
107 ## r = f (x) | 112 ## @var{r} = f (@var{x}) |
108 ## @end example | 113 ## @end example |
109 ## | 114 ## |
110 ## @noindent | 115 ## @noindent |
111 ## in which @var{x} is a vector and @var{r} is a vector. | 116 ## in which @var{x} is a vector and @var{r} is a vector. |
112 ## | 117 ## |
130 ## [ dx_1 dx_2 dx_N ] | 135 ## [ dx_1 dx_2 dx_N ] |
131 ## @end group | 136 ## @end group |
132 ## @end example | 137 ## @end example |
133 ## @end ifnottex | 138 ## @end ifnottex |
134 ## | 139 ## |
135 ## The fifth and sixth arguments are vectors containing lower and upper bounds | 140 ## The fifth and sixth arguments contain lower and upper bounds |
136 ## on @var{x}. These must be consistent with equality and inequality | 141 ## on @var{x}. These must be consistent with the equality and inequality |
137 ## constraints @var{g} and @var{h}. If the bounds are not specified, or are | 142 ## constraints @var{g} and @var{h}. If the arguments are vectors then |
138 ## empty, they are set to -@var{realmax} and @var{realmax} by default. | 143 ## @var{x}(i) is bound by @var{lb}(i) and @var{ub}(i). A bound can also |
139 ## | 144 ## be a scalar in which case all elements of @var{x} will share the same |
140 ## The seventh argument is max. number of iterations. If not specified, | 145 ## bound. If only one bound (lb, ub) is specified then the other will |
141 ## the default value is 100. | 146 ## default to (-@var{realmax}, +@var{realmax}). |
142 ## | 147 ## |
143 ## The eighth argument is tolerance for stopping criteria. If not specified, | 148 ## The seventh argument specifies the maximum number of iterations. |
144 ## the default value is @var{eps}. | 149 ## The default value is 100. |
145 ## | 150 ## |
146 ## Here is an example of calling @code{sqp}: | 151 ## The eighth argument specifies the tolerance for the stopping criteria. |
152 ## The default value is @code{eps}. | |
153 ## | |
154 ## The value returned in @var{info} may be one of the following: | |
155 ## @table @asis | |
156 ## @item 101 | |
157 ## The algorithm terminated normally. | |
158 ## Either all constraints meet the requested tolerance, or the stepsize, | |
159 ## @tex | |
160 ## $\Delta x,$ | |
161 ## @end tex | |
162 ## @ifnottex | |
163 ## delta @var{x}, | |
164 ## @end ifnottex | |
165 ## is less than @code{tol * norm (x)}. | |
166 ## @item 102 | |
167 ## The BFGS update failed. | |
168 ## @item 103 | |
169 ## The maximum number of iterations was reached. | |
170 ## @end table | |
171 ## | |
172 ## An example of calling @code{sqp}: | |
147 ## | 173 ## |
148 ## @example | 174 ## @example |
149 ## function r = g (x) | 175 ## function r = g (x) |
150 ## r = [ sumsq(x)-10; | 176 ## r = [ sumsq(x)-10; |
151 ## x(2)*x(3)-5*x(4)*x(5); | 177 ## x(2)*x(3)-5*x(4)*x(5); |
177 ## -0.0401627 | 203 ## -0.0401627 |
178 ## 0.0379578 | 204 ## 0.0379578 |
179 ## -0.0052227 | 205 ## -0.0052227 |
180 ## @end example | 206 ## @end example |
181 ## | 207 ## |
182 ## The value returned in @var{info} may be one of the following: | |
183 ## @table @asis | |
184 ## @item 101 | |
185 ## The algorithm terminated because the norm of the last step was less | |
186 ## than @code{tol * norm (x))} (the value of tol is currently fixed at | |
187 ## @code{sqrt (eps)}---edit @file{sqp.m} to modify this value. | |
188 ## @item 102 | |
189 ## The BFGS update failed. | |
190 ## @item 103 | |
191 ## The maximum number of iterations was reached (the maximum number of | |
192 ## allowed iterations is currently fixed at 100---edit @file{sqp.m} to | |
193 ## increase this value). | |
194 ## @end table | |
195 ## @seealso{qp} | 208 ## @seealso{qp} |
196 ## @end deftypefn | 209 ## @end deftypefn |
197 | 210 |
198 function [x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif, lb, ub, maxiter, tolerance) | 211 function [x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif, lb, ub, maxiter, tolerance) |
199 | 212 |
202 global __sqp_ce_fun__; | 215 global __sqp_ce_fun__; |
203 global __sqp_ci_fun__; | 216 global __sqp_ci_fun__; |
204 global __sqp_cif__; | 217 global __sqp_cif__; |
205 global __sqp_cifcn__; | 218 global __sqp_cifcn__; |
206 | 219 |
207 if (nargin >= 2 && nargin <= 8 && nargin != 5) | 220 if (nargin < 2 || nargin > 8 || nargin == 5) |
208 | 221 print_usage (); |
209 ## Choose an initial NxN symmetric positive definite Hessan | 222 endif |
210 ## approximation B. | 223 |
211 | 224 if (!isvector (x)) |
212 n = length (x); | 225 error ("sqp: X must be a vector"); |
213 | 226 endif |
214 ## Evaluate objective function, constraints, and gradients at initial | 227 if (rows (x) == 1) |
215 ## value of x. | 228 x = x'; |
216 ## | 229 endif |
217 ## obj_fun | 230 |
218 ## obj_grad | 231 obj_grd = @fd_obj_grd; |
219 ## ce_fun -- equality constraint functions | 232 have_hess = 0; |
220 ## ci_fun -- inequality constraint functions | 233 if (iscell (objf)) |
221 ## A == [grad_{x_1} cx_fun, grad_{x_2} cx_fun, ..., grad_{x_n} cx_fun]^T | 234 switch length (objf) |
222 | 235 case {1} |
223 obj_grd = @fd_obj_grd; | 236 obj_fun = objf{1}; |
224 have_hess = 0; | 237 case {2} |
225 if (iscell (objf)) | 238 obj_fun = objf{1}; |
226 if (length (objf) > 0) | 239 obj_grd = objf{2}; |
227 __sqp_obj_fun__ = obj_fun = objf{1}; | 240 case {3} |
228 if (length (objf) > 1) | 241 obj_fun = objf{1}; |
229 obj_grd = objf{2}; | 242 obj_grd = objf{2}; |
230 if (length (objf) > 2) | 243 obj_hess = objf{3}; |
231 obj_hess = objf{3}; | 244 have_hess = 1; |
232 have_hess = 1; | 245 otherwise |
233 endif | 246 error ("sqp: invalid objective function specification"); |
247 endswitch | |
248 else | |
249 obj_fun = objf; # No cell array, only obj_fun set | |
250 endif | |
251 __sqp_obj_fun__ = obj_fun; | |
252 | |
253 ce_fun = @empty_cf; | |
254 ce_grd = @empty_jac; | |
255 if (nargin > 2) | |
256 ce_grd = @fd_ce_jac; | |
257 if (iscell (cef)) | |
258 switch length (cef) | |
259 case {1} | |
260 ce_fun = cef{1}; | |
261 case {2} | |
262 ce_fun = cef{1}; | |
263 ce_grd = cef{2}; | |
264 otherwise | |
265 error ("sqp: invalid equality constraint function specification"); | |
266 endswitch | |
267 elseif (! isempty (cef)) | |
268 ce_fun = cef; # No cell array, only constraint equality function set | |
269 endif | |
270 endif | |
271 __sqp_ce_fun__ = ce_fun; | |
272 | |
273 ci_fun = @empty_cf; | |
274 ci_grd = @empty_jac; | |
275 if (nargin > 3) | |
276 ## constraint function given by user with possible gradient | |
277 __sqp_cif__ = cif; | |
278 ## constraint function given by user without gradient | |
279 __sqp_cifcn__ = @empty_cf; | |
280 if (iscell (cif)) | |
281 if (length (cif) > 0) | |
282 __sqp_cifcn__ = cif{1}; | |
283 endif | |
284 elseif (! isempty (cif)) | |
285 __sqp_cifcn__ = cif; | |
286 endif | |
287 | |
288 if (nargin < 5 || (nargin > 5 && isempty (lb) && isempty (ub))) | |
289 ## constraint inequality function only without any bounds | |
290 ci_grd = @fd_ci_jac; | |
291 if (iscell (cif)) | |
292 switch length (cif) | |
293 case {1} | |
294 ci_fun = cif{1}; | |
295 case {2} | |
296 ci_fun = cif{1}; | |
297 ci_grd = cif{2}; | |
298 otherwise | |
299 error ("sqp: invalid inequality constraint function specification"); | |
300 endswitch | |
301 elseif (! isempty (cif)) | |
302 ci_fun = cif; # No cell array, only constraint inequality function set | |
303 endif | |
304 else | |
305 ## constraint inequality function with bounds present | |
306 global __sqp_lb__; | |
307 if (isvector (lb)) | |
308 __sqp_lb__ = lb(:); | |
309 elseif (isempty (lb)) | |
310 if (isa (x, "single")) | |
311 __sqp_lb__ = -realmax ("single"); | |
312 else | |
313 __sqp_lb__ = -realmax; | |
234 endif | 314 endif |
235 else | 315 else |
236 error ("sqp: invalid objective function"); | 316 error ("sqp: invalid lower bound"); |
237 endif | 317 endif |
238 else | 318 |
239 __sqp_obj_fun__ = obj_fun = objf; | 319 global __sqp_ub__; |
240 endif | 320 if (isvector (ub)) |
241 | 321 __sqp_ub__ = ub(:); |
242 ce_fun = @empty_cf; | 322 elseif (isempty (ub)) |
243 ce_grd = @empty_jac; | 323 if (isa (x, "single")) |
244 if (nargin > 2) | 324 __sqp_ub__ = realmax ("single"); |
245 ce_grd = @fd_ce_jac; | |
246 if (iscell (cef)) | |
247 if (length (cef) > 0) | |
248 __sqp_ce_fun__ = ce_fun = cef{1}; | |
249 if (length (cef) > 1) | |
250 ce_grd = cef{2}; | |
251 endif | |
252 else | 325 else |
253 error ("sqp: invalid equality constraint function"); | 326 __sqp_ub__ = realmax; |
254 endif | |
255 elseif (! isempty (cef)) | |
256 ce_fun = cef; | |
257 endif | |
258 endif | |
259 __sqp_ce_fun__ = ce_fun; | |
260 | |
261 ci_fun = @empty_cf; | |
262 ci_grd = @empty_jac; | |
263 | |
264 if (nargin > 3) | |
265 ## constraint function given by user with possibly gradient | |
266 __sqp_cif__ = cif; | |
267 ## constraint function given by user without gradient | |
268 __sqp_cifcn__ = @empty_cf; | |
269 if (iscell (__sqp_cif__)) | |
270 if (length (__sqp_cif__) > 0) | |
271 __sqp_cifcn__ = __sqp_cif__{1}; | |
272 endif | |
273 elseif (! isempty (__sqp_cif__)) | |
274 __sqp_cifcn__ = __sqp_cif__; | |
275 endif | |
276 | |
277 if (nargin < 5) | |
278 ci_grd = @fd_ci_jac; | |
279 if (iscell (cif)) | |
280 if (length (cif) > 0) | |
281 __sqp_ci_fun__ = ci_fun = cif{1}; | |
282 if (length (cif) > 1) | |
283 ci_grd = cif{2}; | |
284 endif | |
285 else | |
286 error ("sqp: invalid equality constraint function"); | |
287 endif | |
288 elseif (! isempty (cif)) | |
289 ci_fun = cif; | |
290 endif | 327 endif |
291 else | 328 else |
292 global __sqp_lb__; | 329 error ("sqp: invalid upper bound"); |
293 if (isvector (lb)) | |
294 __sqp_lb__ = lb; | |
295 elseif (isempty (lb)) | |
296 if (isa (x, "single")) | |
297 __sqp_lb__ = -realmax ("single"); | |
298 else | |
299 __sqp_lb__ = -realmax; | |
300 endif | |
301 else | |
302 error ("sqp: invalid lower bound"); | |
303 endif | |
304 | |
305 global __sqp_ub__; | |
306 if (isvector (ub)) | |
307 __sqp_ub__ = ub; | |
308 elseif (isempty (lb)) | |
309 if (isa (x, "single")) | |
310 __sqp_ub__ = realmax ("single"); | |
311 else | |
312 __sqp_ub__ = realmax; | |
313 endif | |
314 else | |
315 error ("sqp: invalid upper bound"); | |
316 endif | |
317 | |
318 if (lb > ub) | |
319 error ("sqp: upper bound smaller than lower bound"); | |
320 endif | |
321 __sqp_ci_fun__ = ci_fun = @cf_ub_lb; | |
322 ci_grd = @cigrad_ub_lb; | |
323 endif | 330 endif |
324 __sqp_ci_fun__ = ci_fun; | 331 |
325 endif | 332 if (__sqp_lb__ > __sqp_ub__) |
326 | 333 error ("sqp: upper bound smaller than lower bound"); |
327 iter_max = 100; | 334 endif |
328 if (nargin > 6 && ! isempty (maxiter)) | 335 ci_fun = @cf_ub_lb; |
329 if (isscalar (maxiter) && maxiter > 0 && round (maxiter) == maxiter) | 336 ci_grd = @cigrad_ub_lb; |
330 iter_max = maxiter; | 337 endif |
338 | |
339 __sqp_ci_fun__ = ci_fun; | |
340 endif # if (nargin > 3) | |
341 | |
342 iter_max = 100; | |
343 if (nargin > 6 && ! isempty (maxiter)) | |
344 if (isscalar (maxiter) && maxiter > 0 && fix (maxiter) == maxiter) | |
345 iter_max = maxiter; | |
346 else | |
347 error ("sqp: invalid number of maximum iterations"); | |
348 endif | |
349 endif | |
350 | |
351 tol = sqrt (eps); | |
352 if (nargin > 7 && ! isempty (tolerance)) | |
353 if (isscalar (tolerance) && tolerance > 0) | |
354 tol = tolerance; | |
355 else | |
356 error ("sqp: invalid value for tolerance"); | |
357 endif | |
358 endif | |
359 | |
360 ## Evaluate objective function, constraints, and gradients at initial | |
361 ## value of x. | |
362 ## | |
363 ## obj_fun -- objective function | |
364 ## obj_grad -- objective gradient | |
365 ## ce_fun -- equality constraint functions | |
366 ## ci_fun -- inequality constraint functions | |
367 ## A == [grad_{x_1} cx_fun, grad_{x_2} cx_fun, ..., grad_{x_n} cx_fun]^T | |
368 | |
369 obj = feval (obj_fun, x); | |
370 __sqp_nfun__ = 1; | |
371 | |
372 c = feval (obj_grd, x); | |
373 | |
374 ## Choose an initial NxN symmetric positive definite Hessan | |
375 ## approximation B. | |
376 n = length (x); | |
377 if (have_hess) | |
378 B = feval (obj_hess, x); | |
379 else | |
380 B = eye (n, n); | |
381 endif | |
382 | |
383 ce = feval (ce_fun, x); | |
384 F = feval (ce_grd, x); | |
385 | |
386 ci = feval (ci_fun, x); | |
387 C = feval (ci_grd, x); | |
388 | |
389 A = [F; C]; | |
390 | |
391 ## Choose an initial lambda (x is provided by the caller). | |
392 | |
393 lambda = 100 * ones (rows (A), 1); | |
394 | |
395 qp_iter = 1; | |
396 alpha = 1; | |
397 | |
398 # report (); | |
399 | |
400 # report (iter, qp_iter, alpha, __sqp_nfun__, obj); | |
401 | |
402 info = 0; | |
403 iter = 0; | |
404 | |
405 while (++iter < iter_max) | |
406 | |
407 ## Check convergence. This is just a simple check on the first | |
408 ## order necessary conditions. | |
409 | |
410 ## idx is the indices of the active inequality constraints. | |
411 | |
412 nr_f = rows (F); | |
413 | |
414 lambda_e = lambda((1:nr_f)'); | |
415 lambda_i = lambda((nr_f+1:end)'); | |
416 | |
417 con = [ce; ci]; | |
418 | |
419 t0 = norm (c - A' * lambda); | |
420 t1 = norm (ce); | |
421 t2 = all (ci >= 0); | |
422 t3 = all (lambda_i >= 0); | |
423 t4 = norm (lambda .* con); | |
424 | |
425 if (t2 && t3 && max ([t0; t1; t4]) < tol) | |
426 info = 101; | |
427 break; | |
428 endif | |
429 | |
430 ## Compute search direction p by solving QP. | |
431 | |
432 g = -ce; | |
433 d = -ci; | |
434 | |
435 ## Discard inequality constraints that have -Inf bounds since those | |
436 ## will never be active. | |
437 idx = isinf (d) & d < 0; | |
438 d(idx) = []; | |
439 C(idx,:) = []; | |
440 | |
441 [p, obj_qp, INFO, lambda] = qp (x, B, c, F, g, [], [], d, C, | |
442 Inf (size (d))); | |
443 | |
444 info = INFO.info; | |
445 | |
446 ## Check QP solution and attempt to recover if it has failed. | |
447 | |
448 ## Choose mu such that p is a descent direction for the chosen | |
449 ## merit function phi. | |
450 | |
451 [x_new, alpha, obj_new] = linesearch_L1 (x, p, obj_fun, obj_grd, | |
452 ce_fun, ci_fun, lambda, obj); | |
453 | |
454 ## Evaluate objective function, constraints, and gradients at | |
455 ## x_new. | |
456 | |
457 c_new = feval (obj_grd, x_new); | |
458 | |
459 ce_new = feval (ce_fun, x_new); | |
460 F_new = feval (ce_grd, x_new); | |
461 | |
462 ci_new = feval (ci_fun, x_new); | |
463 C_new = feval (ci_grd, x_new); | |
464 | |
465 A_new = [F_new; C_new]; | |
466 | |
467 ## Set | |
468 ## | |
469 ## s = alpha * p | |
470 ## y = grad_x L (x_new, lambda) - grad_x L (x, lambda}) | |
471 | |
472 y = c_new - c; | |
473 | |
474 if (! isempty (A)) | |
475 t = ((A_new - A)'*lambda); | |
476 y -= t; | |
477 endif | |
478 | |
479 delx = x_new - x; | |
480 | |
481 if (norm (delx) < tol * norm (x)) | |
482 info = 101; | |
483 break; | |
484 endif | |
485 | |
486 if (have_hess) | |
487 | |
488 B = feval (obj_hess, x); | |
489 | |
490 else | |
491 | |
492 ## Update B using a quasi-Newton formula. | |
493 | |
494 delxt = delx'; | |
495 | |
496 ## Damped BFGS. Or maybe we would actually want to use the Hessian | |
497 ## of the Lagrangian, computed directly. | |
498 | |
499 d1 = delxt*B*delx; | |
500 | |
501 t1 = 0.2 * d1; | |
502 t2 = delxt*y; | |
503 | |
504 if (t2 < t1) | |
505 theta = 0.8*d1/(d1 - t2); | |
331 else | 506 else |
332 error ("sqp: invalid number of maximum iterations"); | 507 theta = 1; |
333 endif | 508 endif |
334 endif | 509 |
335 | 510 r = theta*y + (1-theta)*B*delx; |
336 tol = sqrt (eps); | 511 |
337 if (nargin > 7 && ! isempty (tolerance)) | 512 d2 = delxt*r; |
338 if (isscalar (tolerance) && tolerance > 0) | 513 |
339 tol = tolerance; | 514 if (d1 == 0 || d2 == 0) |
340 else | 515 info = 102; |
341 error ("sqp: invalid value for tolerance"); | |
342 endif | |
343 endif | |
344 | |
345 iter = 0; | |
346 | |
347 obj = feval (obj_fun, x); | |
348 __sqp_nfun__ = 1; | |
349 | |
350 c = feval (obj_grd, x); | |
351 | |
352 if (have_hess) | |
353 B = feval (obj_hess, x); | |
354 else | |
355 B = eye (n, n); | |
356 endif | |
357 | |
358 ce = feval (ce_fun, x); | |
359 F = feval (ce_grd, x); | |
360 | |
361 ci = feval (ci_fun, x); | |
362 C = feval (ci_grd, x); | |
363 | |
364 A = [F; C]; | |
365 | |
366 ## Choose an initial lambda (x is provided by the caller). | |
367 | |
368 lambda = 100 * ones (rows (A), 1); | |
369 | |
370 qp_iter = 1; | |
371 alpha = 1; | |
372 | |
373 ## report (); | |
374 | |
375 ## report (iter, qp_iter, alpha, __sqp_nfun__, obj); | |
376 | |
377 info = 0; | |
378 | |
379 while (++iter < iter_max) | |
380 | |
381 ## Check convergence. This is just a simple check on the first | |
382 ## order necessary conditions. | |
383 | |
384 ## IDX is the indices of the active inequality constraints. | |
385 | |
386 nr_f = rows (F); | |
387 | |
388 lambda_e = lambda((1:nr_f)'); | |
389 lambda_i = lambda((nr_f+1:end)'); | |
390 | |
391 con = [ce; ci]; | |
392 | |
393 t0 = norm (c - A' * lambda); | |
394 t1 = norm (ce); | |
395 t2 = all (ci >= 0); | |
396 t3 = all (lambda_i >= 0); | |
397 t4 = norm (lambda .* con); | |
398 | |
399 if (t2 && t3 && max ([t0; t1; t4]) < tol) | |
400 info = 101; | |
401 break; | 516 break; |
402 endif | 517 endif |
403 | 518 |
404 ## Compute search direction p by solving QP. | 519 B = B - B*delx*delxt*B/d1 + r*r'/d2; |
405 | 520 |
406 g = -ce; | 521 endif |
407 d = -ci; | 522 |
408 | 523 x = x_new; |
409 ## Discard inequality constraints that have -Inf bounds since those | 524 |
410 ## will never be active. | 525 obj = obj_new; |
411 idx = isinf (d) & d < 0; | 526 |
412 d(idx) = []; | 527 c = c_new; |
413 C(idx,:) = []; | 528 |
414 | 529 ce = ce_new; |
415 [p, obj_qp, INFO, lambda] = qp (x, B, c, F, g, [], [], d, C, | 530 F = F_new; |
416 Inf (size (d))); | 531 |
417 | 532 ci = ci_new; |
418 info = INFO.info; | 533 C = C_new; |
419 | 534 |
420 ## Check QP solution and attempt to recover if it has failed. | 535 A = A_new; |
421 | 536 |
422 ## Choose mu such that p is a descent direction for the chosen | 537 # report (iter, qp_iter, alpha, __sqp_nfun__, obj); |
423 ## merit function phi. | 538 |
424 | 539 endwhile |
425 [x_new, alpha, obj_new] = linesearch_L1 (x, p, obj_fun, obj_grd, | 540 |
426 ce_fun, ci_fun, lambda, obj); | 541 if (iter >= iter_max) |
427 | 542 info = 103; |
428 ## Evaluate objective function, constraints, and gradients at | 543 endif |
429 ## x_new. | 544 |
430 | 545 nf = __sqp_nfun__; |
431 c_new = feval (obj_grd, x_new); | |
432 | |
433 ce_new = feval (ce_fun, x_new); | |
434 F_new = feval (ce_grd, x_new); | |
435 | |
436 ci_new = feval (ci_fun, x_new); | |
437 C_new = feval (ci_grd, x_new); | |
438 | |
439 A_new = [F_new; C_new]; | |
440 | |
441 ## Set | |
442 ## | |
443 ## s = alpha * p | |
444 ## y = grad_x L (x_new, lambda) - grad_x L (x, lambda}) | |
445 | |
446 y = c_new - c; | |
447 | |
448 if (! isempty (A)) | |
449 t = ((A_new - A)'*lambda); | |
450 y -= t; | |
451 endif | |
452 | |
453 delx = x_new - x; | |
454 | |
455 if (norm (delx) < tol * norm (x)) | |
456 info = 101; | |
457 break; | |
458 endif | |
459 | |
460 if (have_hess) | |
461 | |
462 B = feval (obj_hess, x); | |
463 | |
464 else | |
465 | |
466 ## Update B using a quasi-Newton formula. | |
467 | |
468 delxt = delx'; | |
469 | |
470 ## Damped BFGS. Or maybe we would actually want to use the Hessian | |
471 ## of the Lagrangian, computed directly. | |
472 | |
473 d1 = delxt*B*delx; | |
474 | |
475 t1 = 0.2 * d1; | |
476 t2 = delxt*y; | |
477 | |
478 if (t2 < t1) | |
479 theta = 0.8*d1/(d1 - t2); | |
480 else | |
481 theta = 1; | |
482 endif | |
483 | |
484 r = theta*y + (1-theta)*B*delx; | |
485 | |
486 d2 = delxt*r; | |
487 | |
488 if (d1 == 0 || d2 == 0) | |
489 info = 102; | |
490 break; | |
491 endif | |
492 | |
493 B = B - B*delx*delxt*B/d1 + r*r'/d2; | |
494 | |
495 endif | |
496 | |
497 x = x_new; | |
498 | |
499 obj = obj_new; | |
500 | |
501 c = c_new; | |
502 | |
503 ce = ce_new; | |
504 F = F_new; | |
505 | |
506 ci = ci_new; | |
507 C = C_new; | |
508 | |
509 A = A_new; | |
510 | |
511 ## report (iter, qp_iter, alpha, __sqp_nfun__, obj); | |
512 | |
513 endwhile | |
514 | |
515 if (iter >= iter_max) | |
516 info = 103; | |
517 endif | |
518 | |
519 nf = __sqp_nfun__; | |
520 | |
521 else | |
522 | |
523 print_usage (); | |
524 | |
525 endif | |
526 | 546 |
527 endfunction | 547 endfunction |
528 | 548 |
529 | 549 |
530 function [merit, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu) | 550 function [merit, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu) |
593 [p1, obj] = phi_L1 ([], obj_fun, ce_fun, ci_fun, x+alpha*p, mu); | 613 [p1, obj] = phi_L1 ([], obj_fun, ce_fun, ci_fun, x+alpha*p, mu); |
594 p2 = phi_x_mu+eta*alpha*D_phi_x_mu; | 614 p2 = phi_x_mu+eta*alpha*D_phi_x_mu; |
595 if (p1 > p2) | 615 if (p1 > p2) |
596 ## Reset alpha = tau_alpha * alpha for some tau_alpha in the | 616 ## Reset alpha = tau_alpha * alpha for some tau_alpha in the |
597 ## range (0, tau). | 617 ## range (0, tau). |
598 tau_alpha = 0.9 * tau; ## ?? | 618 tau_alpha = 0.9 * tau; # ?? |
599 alpha = tau_alpha * alpha; | 619 alpha = tau_alpha * alpha; |
600 else | 620 else |
601 break; | 621 break; |
602 endif | 622 endif |
603 endwhile | 623 endwhile |
604 | |
605 ## Set x_new = x + alpha * p; | |
606 | 624 |
607 x_new = x + alpha * p; | 625 x_new = x + alpha * p; |
608 | 626 |
609 endfunction | 627 endfunction |
610 | 628 |
741 %! | 759 %! |
742 %!function obj = phi (x) | 760 %!function obj = phi (x) |
743 %! obj = exp(prod(x)) - 0.5*(x(1)^3+x(2)^3+1)^2; | 761 %! obj = exp(prod(x)) - 0.5*(x(1)^3+x(2)^3+1)^2; |
744 %! | 762 %! |
745 %!test | 763 %!test |
764 %! | |
746 %! x0 = [-1.8; 1.7; 1.9; -0.8; -0.8]; | 765 %! x0 = [-1.8; 1.7; 1.9; -0.8; -0.8]; |
747 %! | 766 %! |
748 %! [x, obj, info, iter, nf, lambda] = sqp (x0, @phi, @g, []); | 767 %! [x, obj, info, iter, nf, lambda] = sqp (x0, @phi, @g, []); |
749 %! | 768 %! |
750 %! x_opt = [-1.717143501952599; | 769 %! x_opt = [-1.717143501952599; |
754 %! -0.763643068453300]; | 773 %! -0.763643068453300]; |
755 %! | 774 %! |
756 %! obj_opt = 0.0539498477702739; | 775 %! obj_opt = 0.0539498477702739; |
757 %! | 776 %! |
758 %! assert (all (abs (x-x_opt) < 5*sqrt (eps)) && abs (obj-obj_opt) < sqrt (eps)); | 777 %! assert (all (abs (x-x_opt) < 5*sqrt (eps)) && abs (obj-obj_opt) < sqrt (eps)); |
778 | |
779 %% Test input validation | |
780 %!error sqp () | |
781 %!error sqp (1) | |
782 %!error sqp (1,2,3,4,5,6,7,8,9) | |
783 %!error sqp (1,2,3,4,5) | |
784 %!error sqp (ones(2,2)) | |
785 %!error sqp (1,cell(4,1)) | |
786 %!error sqp (1,cell(3,1),cell(3,1)) | |
787 %!error sqp (1,cell(3,1),cell(2,1),cell(3,1)) | |
788 %!error sqp (1,cell(3,1),cell(2,1),cell(2,1),ones(2,2),[]) | |
789 %!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],ones(2,2)) | |
790 %!error sqp (1,cell(3,1),cell(2,1),cell(2,1),1,-1) | |
791 %!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],ones(2,2)) | |
792 %!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],-1) | |
793 %!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],1.5) | |
794 %!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],[],ones(2,2)) | |
795 %!error sqp (1,cell(3,1),cell(2,1),cell(2,1),[],[],[],-1) |