5289
|
1 ## Copyright (C) 2005 John W. Eaton |
|
2 ## |
|
3 ## This file is part of Octave. |
|
4 ## |
|
5 ## Octave is free software; you can redistribute it and/or modify it |
|
6 ## under the terms of the GNU General Public License as published by |
|
7 ## the Free Software Foundation; either version 2, or (at your option) |
|
8 ## any later version. |
|
9 ## |
|
10 ## Octave is distributed in the hope that it will be useful, but |
|
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
13 ## General Public License for more details. |
|
14 ## |
|
15 ## You should have received a copy of the GNU General Public License |
|
16 ## along with Octave; see the file COPYING. If not, write to the Free |
5307
|
17 ## Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
|
18 ## 02110-1301, USA. |
5289
|
19 |
|
20 ## -*- texinfo -*- |
|
21 ## @deftypefn {Function File} {[@var{x}, @var{obj}, @var{info}, @var{iter}, @var{nf}, @var{lambda}] =} sqp (@var{x}, @var{phi}, @var{g}, @var{h}) |
|
22 ## Solve the nonlinear program |
|
23 ## @ifinfo |
|
24 ## |
|
25 ## @example |
|
26 ## min phi (x) |
|
27 ## x |
|
28 ## @end example |
|
29 ## |
|
30 ## @end ifinfo |
|
31 ## @iftex |
|
32 ## @tex |
|
33 ## @end tex |
|
34 ## @end iftex |
|
35 ## subject to |
|
36 ## @ifinfo |
|
37 ## |
|
38 ## @example |
|
39 ## g(x) = 0 |
|
40 ## h(x) >= 0 |
|
41 ## @end example |
|
42 ## @end ifinfo |
|
43 ## @iftex |
|
44 ## @tex |
|
45 ## @end tex |
|
46 ## @end iftex |
|
47 ## |
|
48 ## @noindent |
|
49 ## using a successive quadratic programming method. |
|
50 ## |
|
51 ## The first argument is the initial guess for the vector @var{x}. |
|
52 ## |
|
53 ## The second argument is a function handle pointing to the ojective |
|
54 ## function. The objective function must be of the form |
|
55 ## |
|
56 ## @example |
|
57 ## y = phi (x) |
|
58 ## @end example |
|
59 ## |
|
60 ## @noindent |
|
61 ## in which @var{x} is a vector and @var{y} is a scalar. |
|
62 ## |
|
63 ## The second argument may also be a 2- or 3-element cell array of |
|
64 ## function handles. The first element should point to the objective |
|
65 ## function, the second should point to a function that computes the |
|
66 ## gradient of the objective function, and the third should point to a |
|
67 ## function to compute the hessian of the objective function. If the |
|
68 ## gradient function is not supplied, the gradient is computed by finite |
|
69 ## differences. If the hessian function is not supplied, a BFGS update |
|
70 ## formula is used to approximate the hessian. |
|
71 ## |
|
72 ## If supplied, the gradient function must be of the form |
|
73 ## |
|
74 ## @example |
|
75 ## g = gradient (x) |
|
76 ## @end example |
|
77 ## |
|
78 ## @noindent |
|
79 ## in which @var{x} is a vector and @var{g} is a vector. |
|
80 ## |
|
81 ## If supplied, the hessian function must be of the form |
|
82 ## |
|
83 ## @example |
|
84 ## h = hessian (x) |
|
85 ## @end example |
|
86 ## |
|
87 ## @noindent |
|
88 ## in which @var{x} is a vector and @var{h} is a matrix. |
|
89 ## |
|
90 ## The third and fourth arguments are function handles pointing to |
|
91 ## functions that compute the equality constraints and the inequality |
|
92 ## constraints, respectively. |
|
93 ## |
|
94 ## If your problem does not have equality (or inequality) constraints, |
|
95 ## you may pass an empty matrix for @var{cef} (or @var{cif}). |
|
96 ## |
|
97 ## If supplied, the equality and inequality constraint functions must be |
|
98 ## of the form |
|
99 ## |
|
100 ## @example |
|
101 ## r = f (x) |
|
102 ## @end example |
|
103 ## |
|
104 ## @noindent |
|
105 ## in which @var{x} is a vector and @var{r} is a vector. |
|
106 ## |
|
107 ## The third and fourth arguments may also be 2-element cell arrays of |
|
108 ## function handles. The first element should point to the constraint |
|
109 ## function and the second should point to a function that computes the |
|
110 ## gradient of the constraint function: |
|
111 ## |
|
112 ## @example |
|
113 ## [ d f(x) d f(x) d f(x) ] |
|
114 ## transpose ( [ ------ ----- ... ------ ] ) |
|
115 ## [ dx_1 dx_2 dx_N ] |
|
116 ## @end example |
|
117 ## |
|
118 ## Here is an example of calling @code{sqp}: |
|
119 ## |
|
120 ## @example |
|
121 ## function r = g (x) |
|
122 ## r = [ sumsq(x)-10; x(2)*x(3)-5*x(4)*x(5); x(1)^3+x(2)^3+1]; |
|
123 ## endfunction |
|
124 ## |
|
125 ## function obj = phi (x) |
|
126 ## obj = exp(prod(x)) - 0.5*(x(1)^3+x(2)^3+1)^2; |
|
127 ## endfunction |
|
128 ## |
|
129 ## x0 = [-1.8; 1.7; 1.9; -0.8; -0.8]; |
|
130 ## |
|
131 ## [x, obj, info, iter, nf, lambda] = sqp (x0, @@phi, @@g, []) |
|
132 ## |
|
133 ## x = |
|
134 ## |
|
135 ## -1.71714 |
|
136 ## 1.59571 |
|
137 ## 1.82725 |
|
138 ## -0.76364 |
|
139 ## -0.76364 |
|
140 ## |
|
141 ## obj = 0.053950 |
|
142 ## info = 101 |
|
143 ## iter = 8 |
|
144 ## nf = 10 |
|
145 ## lambda = |
|
146 ## |
|
147 ## -0.0401627 |
|
148 ## 0.0379578 |
|
149 ## -0.0052227 |
|
150 ## @end example |
|
151 ## |
|
152 ## The value returned in @var{info} may be one of the following: |
|
153 ## @table @asis |
|
154 ## @item 101 |
|
155 ## The algorithm terminated because the norm of the last step was less |
|
156 ## than @code{tol * norm (x))} (the value of tol is currently fixed at |
|
157 ## @code{sqrt (eps)}---edit @file{sqp.m} to modify this value. |
|
158 ## @item 102 |
|
159 ## The BFGS update failed. |
|
160 ## @item 103 |
|
161 ## The maximum number of iterations was reached (the maximum number of |
|
162 ## allowed iterations is currently fixed at 100---edit @file{sqp.m} to |
|
163 ## increase this value). |
|
164 ## @end table |
5642
|
165 ## @seealso{qp} |
5289
|
166 ## @end deftypefn |
|
167 |
|
168 function [x, obj, info, iter, nf, lambda] = sqp (x, objf, cef, cif) |
|
169 |
|
170 global nfun; |
|
171 global __sqp_obj_fun__; |
|
172 global __sqp_ce_fun__; |
|
173 global __sqp_ci_fun__; |
|
174 |
|
175 if (nargin >= 2 && nargin <= 4) |
|
176 |
|
177 ## Choose an initial NxN symmetric positive definite Hessan |
|
178 ## approximation B. |
|
179 |
|
180 n = length (x); |
|
181 |
|
182 ## Evaluate objective function, constraints, and gradients at initial |
|
183 ## value of x. |
|
184 ## |
|
185 ## obj_fun |
|
186 ## obj_grad |
|
187 ## ce_fun -- equality constraint functions |
|
188 ## ci_fun -- inequality constraint functions |
|
189 ## A == [grad_{x_1} cx_fun, grad_{x_2} cx_fun, ..., grad_{x_n} cx_fun]^T |
|
190 |
|
191 obj_grd = @fd_obj_grd; |
|
192 have_hess = 0; |
|
193 if (iscell (objf)) |
|
194 if (length (objf) > 0) |
|
195 __sqp_obj_fun__ = obj_fun = objf{1}; |
|
196 if (length (objf) > 1) |
|
197 obj_grd = objf{2}; |
|
198 if (length (objf) > 2) |
|
199 obj_hess = objf{3}; |
|
200 have_hess = 1; |
|
201 endif |
|
202 endif |
|
203 else |
|
204 error ("sqp: invalid objective function"); |
|
205 endif |
|
206 else |
|
207 __sqp_obj_fun__ = obj_fun = objf; |
|
208 endif |
|
209 |
|
210 ce_fun = @empty_cf; |
|
211 ce_grd = @empty_jac; |
|
212 if (nargin > 2) |
|
213 ce_grd = @fd_ce_jac; |
|
214 if (iscell (cef)) |
|
215 if (length (cef) > 0) |
|
216 __sqp_ce_fun__ = ce_fun = cef{1}; |
|
217 if (length (cef) > 1) |
|
218 ce_grd = cef{2}; |
|
219 endif |
|
220 else |
|
221 error ("sqp: invalid equality constraint function"); |
|
222 endif |
|
223 elseif (! isempty (cef)) |
|
224 ce_fun = cef; |
|
225 endif |
|
226 endif |
|
227 __sqp_ce_fun__ = ce_fun; |
|
228 |
|
229 ci_fun = @empty_cf; |
|
230 ci_grd = @empty_jac; |
|
231 if (nargin > 3) |
|
232 ci_grd = @fd_ci_jac; |
|
233 if (iscell (cif)) |
|
234 if (length (cif) > 0) |
|
235 __sqp_ci_fun__ = ci_fun = cif{1}; |
|
236 if (length (cif) > 1) |
|
237 ci_grd = cif{2}; |
|
238 endif |
|
239 else |
|
240 error ("sqp: invalid equality constraint function"); |
|
241 endif |
|
242 elseif (! isempty (cif)) |
|
243 ci_fun = cif; |
|
244 endif |
|
245 endif |
|
246 __sqp_ci_fun__ = ci_fun; |
|
247 |
|
248 iter_max = 100; |
|
249 |
|
250 iter = 0; |
|
251 |
|
252 obj = feval (obj_fun, x); |
|
253 nfun = 1; |
|
254 |
|
255 c = feval (obj_grd, x); |
|
256 |
6382
|
257 if (have_hess) |
|
258 B = feval (obj_hess, x); |
|
259 else |
|
260 B = eye (n, n); |
|
261 endif |
|
262 |
5289
|
263 ce = feval (ce_fun, x); |
|
264 F = feval (ce_grd, x); |
|
265 |
|
266 ci = feval (ci_fun, x); |
|
267 C = feval (ci_grd, x); |
|
268 |
|
269 A = [F; C]; |
|
270 |
|
271 ## Choose an initial lambda (x is provided by the caller). |
|
272 |
|
273 lambda = 100 * ones (rows (A), 1); |
|
274 |
|
275 qp_iter = 1; |
|
276 alpha = 1; |
|
277 |
|
278 ## report (); |
|
279 |
|
280 ## report (iter, qp_iter, alpha, nfun, obj); |
|
281 |
|
282 while (++iter < iter_max) |
|
283 |
|
284 ## Check convergence. This is just a simple check on the first |
|
285 ## order necessary conditions. |
|
286 |
|
287 ## IDX is the indices of the active inequality constraints. |
|
288 |
|
289 nr_f = rows (F); |
|
290 |
|
291 lambda_e = lambda((1:nr_f)'); |
|
292 lambda_i = lambda((nr_f+1:end)'); |
|
293 |
|
294 con = [ce; ci]; |
|
295 |
|
296 t0 = norm (c - A' * lambda); |
|
297 t1 = norm (ce); |
|
298 t2 = all (ci >= 0); |
|
299 t3 = all (lambda_i >= 0); |
|
300 t4 = norm (lambda .* con); |
|
301 |
|
302 tol = sqrt (eps); |
|
303 |
|
304 if (t2 && t3 && max ([t0; t1; t4]) < tol) |
|
305 break; |
|
306 endif |
|
307 |
|
308 ## Compute search direction p by solving QP. |
|
309 |
|
310 g = -ce; |
|
311 d = -ci; |
|
312 |
|
313 ## Discard inequality constraints that have -Inf bounds since those |
|
314 ## will never be active. |
|
315 idx = isinf (d) & d < 0; |
|
316 d(idx) = []; |
|
317 C(idx,:) = []; |
|
318 |
|
319 [p, obj_qp, INFO, lambda] = qp (x, B, c, F, g, [], [], d, C, |
|
320 Inf * ones (size (d))); |
|
321 |
|
322 info = INFO.info; |
|
323 |
|
324 ## Check QP solution and attempt to recover if it has failed. |
|
325 |
|
326 ## Choose mu such that p is a descent direction for the chosen |
|
327 ## merit function phi. |
|
328 |
|
329 [x_new, alpha, obj_new] = linesearch_L1 (x, p, obj_fun, obj_grd, |
|
330 ce_fun, ci_fun, lambda, obj); |
|
331 |
|
332 ## Evaluate objective function, constraints, and gradients at |
|
333 ## x_new. |
|
334 |
|
335 c_new = feval (obj_grd, x_new); |
|
336 |
|
337 ce_new = feval (ce_fun, x_new); |
|
338 F_new = feval (ce_grd, x_new); |
|
339 |
|
340 ci_new = feval (ci_fun, x_new); |
|
341 C_new = feval (ci_grd, x_new); |
|
342 |
|
343 A_new = [F_new; C_new]; |
|
344 |
|
345 ## Set |
|
346 ## |
|
347 ## s = alpha * p |
|
348 ## y = grad_x L (x_new, lambda) - grad_x L (x, lambda}) |
|
349 |
|
350 y = c_new - c; |
|
351 |
|
352 if (! isempty (A)) |
|
353 t = ((A_new - A)'*lambda); |
|
354 y -= t; |
|
355 endif |
|
356 |
|
357 delx = x_new - x; |
|
358 |
|
359 if (norm (delx) < tol * norm (x)) |
|
360 info = 101; |
|
361 break; |
|
362 endif |
|
363 |
|
364 if (have_hess) |
|
365 |
|
366 B = feval (obj_hess, x); |
|
367 |
|
368 else |
|
369 |
|
370 ## Update B using a quasi-Newton formula. |
|
371 |
|
372 delxt = delx'; |
|
373 |
|
374 ## Damped BFGS. Or maybe we would actually want to use the Hessian |
|
375 ## of the Lagrangian, computed directly. |
|
376 |
|
377 d1 = delxt*B*delx; |
|
378 |
|
379 t1 = 0.2 * d1; |
|
380 t2 = delxt*y; |
|
381 |
|
382 if (t2 < t1) |
|
383 theta = 0.8*d1/(d1 - t2); |
|
384 else |
|
385 theta = 1; |
|
386 endif |
|
387 |
|
388 r = theta*y + (1-theta)*B*delx; |
|
389 |
|
390 d2 = delxt*r; |
|
391 |
|
392 if (d1 == 0 || d2 == 0) |
|
393 info = 102; |
|
394 break; |
|
395 endif |
|
396 |
|
397 B = B - B*delx*delxt*B/d1 + r*r'/d2; |
|
398 |
|
399 endif |
|
400 |
|
401 x = x_new; |
|
402 |
|
403 obj = obj_new; |
|
404 |
|
405 c = c_new; |
|
406 |
|
407 ce = ce_new; |
|
408 F = F_new; |
|
409 |
|
410 ci = ci_new; |
|
411 C = C_new; |
|
412 |
|
413 A = A_new; |
|
414 |
|
415 ## report (iter, qp_iter, alpha, nfun, obj); |
|
416 |
|
417 endwhile |
|
418 |
|
419 if (iter >= iter_max) |
|
420 info = 103; |
|
421 endif |
|
422 |
|
423 nf = nfun; |
|
424 |
|
425 else |
|
426 |
6046
|
427 print_usage (); |
5289
|
428 |
|
429 endif |
|
430 |
|
431 ### endfunction |
|
432 |
|
433 |
|
434 function [merit, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu) |
|
435 |
|
436 global nfun; |
|
437 |
|
438 ce = feval (ce_fun, x); |
|
439 ci = feval (ci_fun, x); |
|
440 |
|
441 idx = ci < 0; |
|
442 |
|
443 con = [ce; ci(idx)]; |
|
444 |
|
445 if (isempty (obj)) |
|
446 obj = feval (obj_fun, x); |
|
447 nfun++; |
|
448 endif |
|
449 |
|
450 merit = obj; |
|
451 t = norm (con, 1) / mu; |
|
452 |
|
453 if (! isempty (t)) |
|
454 merit += t; |
|
455 endif |
|
456 |
|
457 ### endfunction |
|
458 |
|
459 |
|
460 function [x_new, alpha, obj] = linesearch_L1 (x, p, obj_fun, obj_grd, |
|
461 ce_fun, ci_fun, lambda, obj) |
|
462 |
|
463 ## Choose parameters |
|
464 ## |
|
465 ## eta in the range (0, 0.5) |
|
466 ## tau in the range (0, 1) |
|
467 |
|
468 eta = 0.25; |
|
469 tau = 0.5; |
|
470 |
|
471 delta_bar = sqrt (eps); |
|
472 |
|
473 if (isempty (lambda)) |
|
474 mu = 1 / delta_bar; |
|
475 else |
|
476 mu = 1 / (norm (lambda, Inf) + delta_bar); |
|
477 endif |
|
478 |
|
479 alpha = 1; |
|
480 |
|
481 c = feval (obj_grd, x); |
|
482 ce = feval (ce_fun, x); |
|
483 |
|
484 [phi_x_mu, obj] = phi_L1 (obj, obj_fun, ce_fun, ci_fun, x, mu); |
|
485 |
|
486 D_phi_x_mu = c' * p; |
|
487 d = feval (ci_fun, x); |
|
488 ## only those elements of d corresponding |
|
489 ## to violated constraints should be included. |
|
490 idx = d < 0; |
|
491 t = - norm ([ce; d(idx)], 1) / mu; |
|
492 if (! isempty (t)) |
|
493 D_phi_x_mu += t; |
|
494 endif |
|
495 |
|
496 while (1) |
|
497 [p1, obj] = phi_L1 ([], obj_fun, ce_fun, ci_fun, x+alpha*p, mu); |
|
498 p2 = phi_x_mu+eta*alpha*D_phi_x_mu; |
|
499 if (p1 > p2) |
|
500 ## Reset alpha = tau_alpha * alpha for some tau_alpha in the |
|
501 ## range (0, tau). |
|
502 tau_alpha = 0.9 * tau; ## ?? |
|
503 alpha = tau_alpha * alpha; |
|
504 else |
|
505 break; |
|
506 endif |
|
507 endwhile |
|
508 |
|
509 ## Set x_new = x + alpha * p; |
|
510 |
|
511 x_new = x + alpha * p; |
|
512 |
|
513 ### endfunction |
|
514 |
|
515 |
|
516 function report (iter, qp_iter, alpha, nfun, obj) |
|
517 |
|
518 if (nargin == 0) |
|
519 printf (" Itn ItQP Step Nfun Objective\n"); |
|
520 else |
|
521 printf ("%5d %4d %8.1g %5d %13.6e\n", iter, qp_iter, alpha, nfun, obj); |
|
522 endif |
|
523 |
|
524 ### endfunction |
|
525 |
|
526 |
|
527 function grd = fdgrd (f, x) |
|
528 |
|
529 if (! isempty (f)) |
|
530 y0 = feval (f, x); |
|
531 nx = length (x); |
|
532 grd = zeros (nx, 1); |
|
533 deltax = sqrt (eps); |
|
534 for i = 1:nx |
|
535 t = x(i); |
|
536 x(i) += deltax; |
|
537 grd(i) = (feval (f, x) - y0) / deltax; |
|
538 x(i) = t; |
|
539 endfor |
|
540 else |
|
541 grd = zeros (0, 1); |
|
542 endif |
|
543 |
|
544 ### endfunction |
|
545 |
|
546 |
|
547 function jac = fdjac (f, x) |
|
548 |
|
549 if (! isempty (f)) |
|
550 y0 = feval (f, x); |
|
551 nf = length (y0); |
|
552 nx = length (x); |
|
553 jac = zeros (nf, nx); |
|
554 deltax = sqrt (eps); |
|
555 for i = 1:nx |
|
556 t = x(i); |
|
557 x(i) += deltax; |
|
558 jac(:,i) = (feval (f, x) - y0) / deltax; |
|
559 x(i) = t; |
|
560 endfor |
|
561 else |
|
562 jac = zeros (0, nx); |
|
563 endif |
|
564 |
|
565 ### endfunction |
|
566 |
|
567 |
|
568 function grd = fd_obj_grd (x) |
|
569 |
|
570 global __sqp_obj_fun__; |
|
571 |
|
572 grd = fdgrd (__sqp_obj_fun__, x); |
|
573 |
|
574 ### endfunction |
|
575 |
|
576 |
|
577 function res = empty_cf (x) |
|
578 |
|
579 res = zeros (0, 1); |
|
580 |
|
581 ### endfunction |
|
582 |
|
583 |
|
584 function res = empty_jac (x) |
|
585 |
|
586 res = zeros (0, length (x)); |
|
587 |
|
588 ### endfunction |
|
589 |
|
590 |
|
591 function jac = fd_ce_jac (x) |
|
592 |
|
593 global __sqp_ce_fun__; |
|
594 |
|
595 jac = fdjac (__sqp_ce_fun__, x); |
|
596 |
|
597 ### endfunction |
|
598 |
|
599 |
|
600 function jac = fd_ci_jac (x) |
|
601 |
|
602 global __sqp_ci_fun__; |
|
603 |
|
604 jac = fdjac (__sqp_ci_fun__, x); |
|
605 |
|
606 ### endfunction |