Mercurial > jwe > octave

--- a/doc/interpreter/linalg.txi	Fri Mar 09 13:44:08 2018 +0200
+++ b/doc/interpreter/linalg.txi	Sun Mar 11 07:29:04 2018 +0900
@@ -224,3 +224,5 @@
 @DOCSTRING(gmres)

 @DOCSTRING(qmr)
+
+@DOCSTRING(tfqmr)
--- a/scripts/sparse/bicg.m	Fri Mar 09 13:44:08 2018 +0200
+++ b/scripts/sparse/bicg.m	Sun Mar 11 07:29:04 2018 +0900
@@ -1,5 +1,6 @@
 ## Copyright (C) 2006 Sylvain Pelissier
 ## Copyright (C) 2012-2016 Carlo de Falco
+## Copyright (C) 2016 Cristiano Dorigo, Octave Arena
 ##
 ## This file is part of Octave.
 ##
@@ -18,35 +19,67 @@
 ## <https://www.gnu.org/licenses/>.

 ## -*- texinfo -*-
-## @deftypefn  {} {@var{x} =} bicg (@var{A}, @var{b}, @var{rtol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0})
-## @deftypefnx {} {@var{x} =} bicg (@var{A}, @var{b}, @var{rtol}, @var{maxit}, @var{P})
+## @deftypefn  {} {@var{x} =} bicg (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0}, @dots{})
+## @deftypefnx {} {@var{x} =} bicg (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M}, [], @var{x0}, @dots{})
 ## @deftypefnx {} {[@var{x}, @var{flag}, @var{relres}, @var{iter}, @var{resvec}] =} bicg (@var{A}, @var{b}, @dots{})
 ## Solve @code{A x = b} using the Bi-conjugate gradient iterative method.
 ##
+## The input parameters are:
+##
 ## @itemize @minus
-## @item @var{rtol} is the relative tolerance, if not given or set to [] the
-## default value 1e-6 is used.
+##
+## @item @var{A} it is a square matrix. @var{A} can be passed as a matrix or
+## as a function handle or inline function
+## @code{Afun} such that @code{Afun (x, "notransp") = A * x} and
+## @code{Afun (x, "transp") = A' * x}.  Additional parameters to
+## @code{Afun} are passed after @var{x0}.
+##
+## @item @var{b} is the right hand side vector. It must be a column vector
+## with same number of rows of @var{A}.
+##
+## @item
+## @var{tol} is the required relative tolerance for the residual error,
+## @w{@code{@var{b} - @var{A} * @var{x}}}.  The iteration stops if
+## @w{@code{norm (@var{b} - @var{A} * @var{x})} @leq{}
+## @w{@code{@var{tol} * norm (@var{b})}}}.
+## If @var{tol} is omitted or empty, then a tolerance of 1e-6 is used.
 ##
 ## @item @var{maxit} the maximum number of outer iterations, if not given or
 ## set to [] the default value @code{min (20, numel (b))} is used.
 ##
+## @item @var{M1}, @var{M2} are the preconditioners. The
+## preconditioner @var{M} is given as @code{@var{M} = @var{M1} * @var{M2}}.
+## Both @var{M1} and @var{M2} can be passed as a matrix or as a
+## function handle or inline
+## function @code{g} such that @code{g(@var{x}, "notransp") =
+## @var{M1} \ @var{x}} or
+## @code{g(@var{x}, "notransp") = @var{M2} \ @var{x}}
+## and @code{g(@var{x}, "transp") = @var{M1}' \ @var{x}} or
+## @code{g(@var{x}, "transp") = @var{M2}' \ @var{x}}.
+## If @var{M1} is empty or not passed, then preconditioning is not applied.
+## The preconditioned system is theoretically equivalent to apply the
+## @code{bicg} method to the linear systems
+## @code{inv (@var{M1}) * A * inv (@var{M2}) * @var{y} = inv
+## (@var{M1}) * @var{b}} and
+## @code{inv (@var{M2'}) * A' * inv (@var{M1'}) * @var{z} =
+## inv  (@var{M2'}) * @var{b}} and then set
+## @code{@var{x} = inv (@var{M2}) * @var{y}}.
+##
 ## @item @var{x0} the initial guess, if not given or set to [] the default
 ## value @code{zeros (size (b))} is used.
 ## @end itemize
 ##
-## @var{A} can be passed as a matrix or as a function handle or inline function
-## @code{f} such that @code{f(x, "notransp") = A*x} and
-## @code{f(x, "transp") = A'*x}.
+## The arguments which follow @var{x0} are treated as parameters, and passed in
+## a proper way to any of the functions (@var{A} or @var{M}) which are passed
+## to @code{bicg}.
+##
+## The output parameters are:
 ##
-## The preconditioner @var{P} is given as @code{P = M1 * M2}.  Both @var{M1}
-## and @var{M2} can be passed as a matrix or as a function handle or inline
-## function @code{g} such that @code{g(x, "notransp") = M1 \ x} or
-## @code{g(x, "notransp") = M2 \ x} and @code{g(x, "transp") = M1' \ x} or
-## @code{g(x, "transp") = M2' \ x}.
+## @itemize
 ##
-## If called with more than one output parameter
+## @item @var{x} is the approximation computed. If the method doesn't
+## converge then it is the iterated with the minimum residual.
 ##
-## @itemize @minus
 ## @item @var{flag} indicates the exit status:
 ##
 ## @itemize @minus
@@ -54,184 +87,292 @@
 ##
 ## @item 1: the maximum number of iterations was reached before convergence
 ##
+## @item 2: the preconditioner matrix is singular
+##
 ## @item 3: the algorithm reached stagnation
+##
+## @item 4: the algorithm can't continue due to a division by zero
+## @end itemize
+##
+## @item @var{relres} is the relative residual obtained as
+## @code{(@var{A}*@var{x}-@var{b}) / @code{norm(@var{b})}}.
+##
+## @item @var{iter} is the iteration which @var{x} is computed.
+##
+## @item @var{resvec} is a vector containing the residual at each iteration.
+## Doing @code{length(@var{resvec}) - 1} is possible to see the total number
+## of iterations performed.
 ## @end itemize
 ##
-## (the value 2 is unused but skipped for compatibility).
+## Let us consider a trivial problem with a tridiagonal matrix
 ##
-## @item @var{relres} is the final value of the relative residual.
+## @example
+## @group
+## n = 20;
+## A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+##     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+##     sparse (1, 2, 1, 1, n) * n / 2);
+## b = A * ones (n, 1);
+## restart = 5;
+## [M1, M2] = ilu (A); # in this tridiag case, it corresponds to lu (A)
+## M = M1 * M2;
+## Afun = @@(x, string) strcmp (string, "notransp") * (A * x) + ...
+##                      strcmp (string, "transp") * (A' * x);
+## Mfun = @@(x, string) strcmp (string, "notransp") * (M \ x) + ...
+##                      strcmp (string, "transp") * (M' \ x);
+## M1fun = @@(x, string) strcmp (string, "notransp") * (M1 \ x) + ...
+##                      strcmp (string, "transp") * (M1' \ x);
+## M2fun = @@(x, string) strcmp (string, "notransp") * (M2 \ x) + ...
+##                      strcmp (string, "transp") * (M2' \ x);;
+## @end group
+## @end example
 ##
-## @item @var{iter} is the number of iterations performed.
+## @sc{Example 1:} simplest usage of @code{bicg}
+##
+## @example
+## x = bicg (A, b, [], n)
+## @end example
+##
+## @sc{Example 2:} @code{bicg} with a function which computes
+## @code{@var{A} * @var{x}} and @code{@var{A'} * @var{x}}
+##
+## @example
+## x = bicg (Afun, b, [], n)
+## @end example
+##
+## @sc{Example 3:} @code{bicg} with a preconditioner matrix @var{M}
+##
+## @example
+## x = bicg (A, b, [], 1e-06, n, M)
+## @end example
+##
+## @sc{Example 4:} @code{bicg} with a function as preconditioner
 ##
-## @item @var{resvec} is a vector containing the relative residual at each
-## iteration.
-## @end itemize
+## @example
+## x = bicg (Afun, b, 1e-6, n, Mfun)
+## @end example
+##
+## @sc{Example 5:} @code{bicg} with preconditioner matrices @var{M1}
+## and @var{M2}
+##
+## @example
+## x = bicg (A, b, [], 1e-6, n, M1, M2)
+## @end example
+##
+## @sc{Example 6:} @code{bicg} with functions as preconditioners
+##
+## @example
+## x = bicg (Afun, b, 1e-6, n, M1fun, M2fun)
+## @end example
+##
+## @sc{Example 7:} @code {bicg} with as input a function requiring an argument
 ##
-## @seealso{bicgstab, cgs, gmres, pcg, qmr}
+## @example
+## @group
+## function y = Ap (A, x, string, z) # compute A^z * x or (A^z)' * x
+##    y = x;
+##    if (strcmp (string, "notransp"))
+##      for i = 1:z
+##        y = A * y;
+##      endfor
+##    elseif (strcmp (string, "transp"))
+##      for i = 1:z
+##        y = A' * y;
+##      endfor
+##    endif
+##  endfunction
+## Apfun = @@(x, string, p) Ap (A, x, string, p);
+## x = bicg (Apfun, b, [], [], [], [], [], 2);
+## @end group
+## @end example
+##
+## References:
+##
+## @enumerate
+##
+## @item @nospell{Y. Saad}, @cite{Iterative Methods for Sparse Linear
+## Systems}, Second edition, 2003, SIAM
+##
+## @end enumerate
+##
+## @seealso{bicgstab, cgs, gmres, pcg, qmr, tfqmr}
 ##
 ## @end deftypefn

-## Author: Sylvain Pelissier <sylvain.pelissier@gmail.com>
-## Author: Carlo de Falco
-
-function [x, flag, res1, k, resvec] = bicg (A, b, rtol, maxit, M1, M2, x0)
-
-  if (nargin >= 2 && isvector (full (b)))
+function [x_min, flag, relres, iter_min, resvec] = ...
+         bicg (A, b, tol = [], maxit = [], M1 = [], M2 = [], x0 = [], varargin)

-    if (ischar (A))
-      fun = str2func (A);
-      Ax  = @(x) feval (fun, x, "notransp");
-      Atx = @(x) feval (fun, x, "transp");
-    elseif (isnumeric (A) && issquare (A))
-      Ax  = @(x) A  * x;
-      Atx = @(x) A' * x;
-    elseif (isa (A, "function_handle"))
-      Ax  = @(x) feval (A, x, "notransp");
-      Atx = @(x) feval (A, x, "transp");
-    else
-      error ("bicg: A must be a square matrix or function");
-    endif
-
-    if (nargin < 3 || isempty (rtol))
-      rtol = 1e-6;
-    endif
-
-    if (nargin < 4 || isempty (maxit))
-      maxit = min (rows (b), 20);
-    else
-      maxit = fix (maxit);
-    endif
+  [Afun, M1fun, M2fun] =  __alltohandles__ (A, b, M1, M2, "bicg");

-    if (nargin < 5 || isempty (M1))
-      M1m1x = @(x, ignore) x;
-      M1tm1x = M1m1x;
-    elseif (ischar (M1))
-      fun = str2func (M1);
-      M1m1x  = @(x) feval (fun, x, "notransp");
-      M1tm1x = @(x) feval (fun, x, "transp");
-    elseif (isnumeric (M1) && ismatrix (M1))
-      M1m1x  = @(x) M1  \ x;
-      M1tm1x = @(x) M1' \ x;
-    elseif (isa (M1, "function_handle"))
-      M1m1x  = @(x) feval (M1, x, "notransp");
-      M1tm1x = @(x) feval (M1, x, "transp");
-    else
-      error ("bicg: preconditioner M1 must be a function or matrix");
-    endif
+  [tol, maxit, x0] = __default__input__ ({1e-06, min(rows(b), 20), ...
+                                          zeros(rows (b),1)}, tol, maxit, x0);

-    if (nargin < 6 || isempty (M2))
-      M2m1x = @(x, ignore) x;
-      M2tm1x = M2m1x;
-    elseif (ischar (M2))
-      fun = str2func (M2);
-      M2m1x  = @(x) feval (fun, x, "notransp");
-      M2tm1x = @(x) feval (fun, x, "transp");
-    elseif (isnumeric (M2) && ismatrix (M2))
-      M2m1x  = @(x) M2  \ x;
-      M2tm1x = @(x) M2' \ x;
-    elseif (isa (M2, "function_handle"))
-      M2m1x  = @(x) feval (M2, x, "notransp");
-      M2tm1x = @(x) feval (M2, x, "transp");
-    else
-      error ("bicg: preconditioner M2 must be a function or matrix");
-    endif
-
-    Pm1x  = @(x) M2m1x  (M1m1x (x));
-    Ptm1x = @(x) M1tm1x (M2tm1x (x));
-
-    if (nargin < 7 || isempty (x0))
-      x0 = zeros (size (b));
-    endif
-
-    y = x = x0;
+  if (columns (b) == 2)
+    c = b(:,2);
+    b = b(:,1);
+  else
     c = b;
-
-    r0 = b - Ax (x);
-    s0 = c - Atx (y);
-
-    d = Pm1x (r0);
-    f = Ptm1x (s0);
-
-    bnorm = norm (b);
-    res0  = Inf;
-
-    if (any (r0 != 0))
-
-      for k = 1:maxit
-
-        a  = (s0' * Pm1x (r0)) ./ (f' * Ax (d));
-
-        x += a * d;
-        y += conj (a) * f;
-
-        r1 = r0 - a * Ax (d);
-        s1 = s0 - conj (a) * Atx (f);
-
-        beta = (s1' * Pm1x (r1)) ./ (s0' * Pm1x (r0));
-
-        d = Pm1x (r1) + beta * d;
-        f = Ptm1x (s1) + conj (beta) * f;
-
-        r0 = r1;
-        s0 = s1;
+  endif
+  norm_b = norm (b, 2);

-        res1 = norm (b - Ax (x)) / bnorm;
-        if (res1 < rtol)
-          flag = 0;
-          if (nargout < 2)
-            printf ("bicg converged at iteration %i ", k);
-            printf ("to a solution with relative residual %e\n", res1);
-          endif
-          break;
-        endif
-
-        if (res0 <= res1)
-          flag = 3;
-          printf ("bicg stopped at iteration %i ", k);
-          printf ("without converging to the desired tolerance %e\n", rtol);
-          printf ("because the method stagnated.\n");
-          printf ("The iterate returned (number %i) ", k-1);
-          printf ("has relative residual %e\n", res0);
-          break
-        endif
-        res0 = res1;
-        if (nargout > 4)
-          resvec(k) = res0;
-        endif
-      endfor
-
-      if (k == maxit)
-        flag = 1;
-        printf ("bicg stopped at iteration %i ", maxit);
-        printf ("without converging to the desired tolerance %e\n", rtol);
-        printf ("because the maximum number of iterations was reached. ");
-        printf ("The iterate returned (number %i) has ", maxit);
-        printf ("relative residual %e\n", res1);
-      endif
-
-    else
-      flag = 0;
-      if (nargout < 2)
-        printf ("bicg converged after 0 interations\n");
-      endif
+  if (norm_b == 0) # the only (only iff det(A) == 0) solution is x = 0
+    if (nargout < 2)
+      printf("The right hand side vector is all zero so bicg \n")
+      printf ("returned an all zero solution without iterating.\n")
     endif
-
-  else
-    print_usage ();
+    x_min = zeros (numel (b), 1);
+    flag = 0;
+    relres = 0;
+    iter_min = 0;
+    resvec = 0;
+    return
   endif

-endfunction;
+  x = x_min = x_pr = x0;
+  iter = iter_min = 0;
+  flag = 1; # Default flag is "maximum number of iterations reached"
+  resvec = zeros (maxit + 1, 1);
+  r0 = b - feval (Afun, x, "notransp", varargin{:}); # Residual of the sytem
+  s0 = c - feval (Afun, x, "transp", varargin{:}); # Res. of the "dual system"
+  resvec (1) = norm (r0, 2);
+
+  try
+    warning("error", "Octave:singular-matrix", "local")
+    prec_r0 = feval (M1fun, r0, "notransp", varargin{:}); # r0 preconditioned
+    prec_s0 = s0;
+    prec_r0 = feval (M2fun, prec_r0, "notransp", varargin{:});
+    prec_s0 = feval (M2fun, prec_s0, "transp", varargin{:});
+    prec_s0 = feval (M1fun, prec_s0, "transp", varargin{:}); # s0 preconditioned
+    p = prec_r0; # Direction of the system
+    q = prec_s0; # Direction of the "dual system"
+  catch
+    flag = 2;
+  end_try_catch
+
+  while ((flag != 2) && (iter < maxit) && ...
+         (resvec (iter + 1) >= norm_b * tol))
+    v = feval (Afun, p, "notransp", varargin{:});
+    prod_qv = q' * v;
+    if (prod_qv == 0)
+      flag = 4;
+      break
+    endif
+    alpha = (s0' * prec_r0) / prod_qv;
+    x += alpha * p;
+    prod_rs = (s0' * prec_r0); # Product between r0 and s0
+    r0 -= alpha * v;
+    s0 -= conj (alpha) * feval (Afun, q, "transp", varargin{:});
+    prec_r0 = feval (M1fun, r0, "notransp", varargin{:});
+    prec_s0 = s0;
+    prec_r0 = feval (M2fun, prec_r0, "notransp", varargin{:});
+    prec_s0 = feval (M2fun, prec_s0, "transp", varargin{:});
+    prec_s0 = feval (M1fun, prec_s0, "transp", varargin{:});
+    iter += 1;
+    resvec (iter + 1) = norm (r0);
+    if (resvec (iter + 1) <= resvec (iter_min + 1))
+      x_min = x;
+      iter_min = iter;
+    endif
+    if (norm (x - x_pr) <= norm (x) * eps)
+      flag = 3;
+      break;
+    endif
+    if (prod_rs == 0)
+      flag = 4;
+      break;
+    endif
+    beta = (s0' * prec_r0) / prod_rs;
+    p = prec_r0 + beta*p;
+    q = prec_s0 + conj (beta) * q;
+  endwhile
+  resvec = resvec (1:iter+1,1);
+
+  if (flag == 2)
+    relres = 1;
+  else
+    relres = resvec (iter_min + 1) / norm_b;
+  endif

+  if ((flag == 1) && (relres <= tol))
+    flag = 0;
+  endif
+
+  if (nargout < 2)
+    switch (flag)
+      case {0}
+        printf ("bicg converged at iteration %i ", iter_min);
+        printf ("to a solution with relative residual %e\n", relres);
+      case {1}
+        printf ("bicg stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the maximum number of iterations was reached. ");
+        printf ("The iterate returned (number %i) has ", iter_min);
+        printf ("relative residual %e\n", relres);
+      case {2}
+        printf ("bicg stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the preconditioner matrix is singular.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {3}
+        printf ("bicg stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the method stagnated.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {4}
+        printf ("bicg stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the method can't continue.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+    endswitch
+  endif
+
+endfunction
+
+%!test
+%! ## Check that all type of inputs work
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = A * ones (5, 1);
+%! M1 = diag (sqrt (diag (A)));
+%! M2 = M1;
+%! Afun = @(z, string) strcmp (string, "notransp") * (A * z) + ...
+%!                     strcmp (string, "transp") * (A' * z);
+%! M1_fun = @(z, string) strcmp (string,"notransp") * (M1 \ z) + ...
+%!                         strcmp (string, "transp") * (M1' \ z);
+%! M2_fun = @(z, string) strcmp (string, "notransp") * (M2 \ z) + ...
+%!                         strcmp (string, "transp") * (M2' \ z);
+%! [x, flag] = bicg (A, b);
+%! assert (flag, 0);
+%! [x, flag] = bicg (A, b, [], [], M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = bicg (A, b, [], [], M1_fun, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = bicg (A, b,[], [], M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = bicg (A, b,[], [], M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = bicg (Afun, b);
+%! assert (flag, 0);
+%! [x, flag] = bicg (Afun, b,[], [], M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = bicg (Afun, b,[], [], M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = bicg (Afun, b,[], [], M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = bicg (Afun, b,[], [], M1_fun, M2_fun);
+%! assert (flag, 0);

 %!test
 %! n = 100;
 %! A = spdiags ([-2*ones(n,1) 4*ones(n,1) -ones(n,1)], -1:1, n, n);
 %! b = sum (A, 2);
-%! rtol = 1e-8;
+%! tol = 1e-8;
 %! maxit = 15;
 %! M1 = spdiags ([ones(n,1)/(-2) ones(n,1)],-1:0, n, n);
 %! M2 = spdiags ([4*ones(n,1) -ones(n,1)], 0:1, n, n);
-%! [x, flag, relres, iter, resvec] = bicg (A, b, rtol, maxit, M1, M2);
+%! [x, flag, relres, iter, resvec] = bicg (A, b, tol, maxit, M1, M2);
 %! assert (x, ones (size (b)), 1e-7);

 %!function y = afun (x, t, a)
@@ -247,20 +388,129 @@
 %! n = 100;
 %! A = spdiags ([-2*ones(n,1) 4*ones(n,1) -ones(n,1)], -1:1, n, n);
 %! b = sum (A, 2);
-%! rtol = 1e-8;
+%! tol = 1e-8;
 %! maxit = 15;
 %! M1 = spdiags ([ones(n,1)/(-2) ones(n,1)],-1:0, n, n);
 %! M2 = spdiags ([4*ones(n,1) -ones(n,1)], 0:1, n, n);
 %!
 %! [x, flag, relres, iter, resvec] = bicg (@(x, t) afun (x, t, A),
-%!                                         b, rtol, maxit, M1, M2);
+%!                                         b, tol, maxit, M1, M2);
 %! assert (x, ones (size (b)), 1e-7);

 %!test
 %! n = 100;
-%! rtol = 1e-8;
+%! tol = 1e-8;
 %! a = sprand (n, n, .1);
 %! A = a' * a + 100 * eye (n);
 %! b = sum (A, 2);
-%! [x, flag, relres, iter, resvec] = bicg (A, b, rtol, [], diag (diag (A)));
+%! [x, flag, relres, iter, resvec] = bicg (A, b, tol, [], diag (diag (A)));
 %! assert (x, ones (size (b)), 1e-7);
+
+%!test
+%! ## Check that if the preconditioner is singular, the method doesn't work
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = ones(5,1);
+%! M = ones (5);
+%! [x, flag] = bicg (A, b, [], [], M);
+%! assert (flag, 2)
+
+%!test
+%! ## If A singular, the algorithm doesn't work due to division by zero
+%! A = ones (5);
+%! b = [1:5]';
+%! [x, flag] = bicg (A, b);
+%! assert (flag, 4)
+
+%!test
+%! ## test for a complex linear system
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0])) + ...
+%! 1i * toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! [x, flag] = bicg (A, b);
+%! assert (flag, 0)
+
+%!test
+%! A = single (1);
+%! b = 1;
+%! [x, flag] = bicg (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = 1;
+%! b = single (1);
+%! [x, flag] = bicg (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = single (1);
+%! b = single (1);
+%! [x, flag] = bicg (A, b);
+%! assert (class (x), "single")
+
+%!test
+%!function y = Afun (x, trans)
+%!   A = toeplitz (sparse ([2, 1, 0, 0]), sparse ([2, -1, 0, 0]) );
+%!   if (strcmp (trans, "notransp"))
+%!      y = A * x;
+%!   else
+%!      y = A' * x;
+%!   endif
+%!endfunction
+%! [x, flag] = bicg ("Afun", [1; 2; 2; 3]);
+%! assert (x, ones(4, 1), 1e-6)
+
+%!test # unpreconditioned residual
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M = magic (5);
+%! [x, flag, relres] = bicg (A, b, [], 2, M);
+%! assert (relres, norm (b - A * x) / norm (b), 8 * eps)
+
+%!demo # simplest use
+%! n = 20;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+%!     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+%!     sparse (1, 2, 1, 1, n) * n / 2);
+%! b = A * ones (n, 1);
+%! [M1, M2] = ilu (A + 0.1 * eye (n));
+%! M = M1 * M2;
+%! x = bicg (A, b, [], n);
+%! function y = Ap (A, x, string, z) # compute A^z * x or (A^z)' * x
+%!   y = x;
+%!   if (strcmp (string, "notransp"))
+%!     for i = 1:z
+%!       y = A * y;
+%!   endfor
+%!   elseif (strcmp (string, "transp"))
+%!     for i = 1:z
+%!       y = A' * y;
+%!     endfor
+%!   endif
+%! endfunction
+%! Afun = @(x, string) Ap (A, x, string, 1);
+%! x = bicg (Afun, b, [], n);
+%! x = bicg (A, b, 1e-6, n, M);
+%! x = bicg (A, b, 1e-6, n, M1, M2);
+%! function y = Mfun(M, x, string)
+%! if (strcmp (string, "notransp"))
+%!   y = M \ x;
+%! else
+%!   y = M' \ x;
+%! endif
+%! endfunction
+%! M1fun = @(x, string) Mfun (M, x, string);
+%! x = bicg (Afun, b, 1e-6, n, M1fun);
+%! M1fun = @(x, string) Mfun (M1, x, string);
+%! M2fun = @(x, string) Mfun (M2, x, string);
+%! x = bicg (Afun, b, 1e-6, n, M1fun, M2fun);
+%! Afun = @(x, string, p) Ap (A, x, string, p);
+%! x = bicg (Afun, b, [], 2*n, [], [], [], 2); # solution of A^2 * x = b
+
+%!test # preconditioned technique
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! [M1, M2] = lu (A + eye (5));
+%! [x, flag] = bicg (A, b, [], 1, M1, M2);
+%! ## b has two columns!
+%! [y, flag]  = bicg (M1 \ A / M2, [M1 \ b, M2' \ b], [], 1);
+%! assert (x, M2 \ y, 8 * eps)
--- a/scripts/sparse/bicgstab.m	Fri Mar 09 13:44:08 2018 +0200
+++ b/scripts/sparse/bicgstab.m	Sun Mar 11 07:29:04 2018 +0900
@@ -1,5 +1,6 @@
 ## Copyright (C) 2008-2017 Radek Salac
 ## Copyright (C) 2012 Carlo de Falco
+## Copyright (C) 2016 Cristiano Dorigo, Octave Arena
 ##
 ## This file is part of Octave.
 ##
@@ -18,33 +19,60 @@
 ## <https://www.gnu.org/licenses/>.

 ## -*- texinfo -*-
-## @deftypefn  {} {@var{x} =} bicgstab (@var{A}, @var{b}, @var{rtol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0})
-## @deftypefnx {} {@var{x} =} bicgstab (@var{A}, @var{b}, @var{rtol}, @var{maxit}, @var{P})
+## @deftypefn  {} {@var{x} =} bicgstab (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0}, @dots{})
+## @deftypefnx {} {@var{x} =} bicgstab (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M}, [], @var{x0}, @dots{})
 ## @deftypefnx {} {[@var{x}, @var{flag}, @var{relres}, @var{iter}, @var{resvec}] =} bicgstab (@var{A}, @var{b}, @dots{})
 ## Solve @code{A x = b} using the stabilizied Bi-conjugate gradient iterative
 ## method.
 ##
+## The input parameters are:
+##
 ## @itemize @minus
-## @item @var{rtol} is the relative tolerance, if not given or set to [] the
-## default value 1e-6 is used.
+##
+## @item @var{A} is the matrix of the linear system and it must be square.
+## @var{A} can be passed as a matrix, function handle, or inline
+## function @code{Afun} such that @code{Afun(x) = A * x}. Additional
+## parameters to @code{Afun} are passed after @var{x0}.
+##
+## @item @var{b} is the right hand side vector. It must be a column vector
+## with the same number of rows as @var{A}.
+##
+## @item
+## @var{tol} is the required relative tolerance for the residual error,
+## @w{@code{@var{b} - @var{A} * @var{x}}}.  The iteration stops if
+## @w{@code{norm (@var{b} - @var{A} * @var{x})} @leq{}
+## @w{@code{@var{tol} * norm (@var{b})}}}.
+## If @var{tol} is omitted or empty, then a tolerance of 1e-6 is used.
 ##
 ## @item @var{maxit} the maximum number of outer iterations, if not given or
 ## set to [] the default value @code{min (20, numel (b))} is used.
 ##
+## @item @var{M1}, @var{M2} are the preconditioners. The preconditioner
+## @var{M} is given as @code{@var{M} = @var{M1} * @var{M2}}.
+## Both @var{M1} and @var{M2} can be passed as a matrix or as a function
+## handle or inline function @code{g} such that
+## @code{g(@var{x}) = @var{M1} \ @var{x}} or
+## @code {g(@var{x}) = @var{M2} \ @var{x}}.
+## The techinque used is the right preconditioning, i.e. it is
+## solved @code{@var{A} * inv (@var{M}) * @var{y} = @var{b}} and then
+## @code{@var{x} = inv (@var{M}) * @var{y}}.
+##
 ## @item @var{x0} the initial guess, if not given or set to [] the default
-## value @code{zeros (size (b))} is used.
+## value @code{zeros (size (@var{b}))} is used.
+##
 ## @end itemize
 ##
-## @var{A} can be passed as a matrix or as a function handle or inline
-## function @code{f} such that @code{f(x) = A*x}.
+## The arguments which follow @var{x0} are treated as parameters, and passed in
+## a proper way to any of the functions (@var{A} or @var{M}) which are passed
+## to @code{bicstab}.
 ##
-## The preconditioner @var{P} is given as @code{P = M1 * M2}.  Both @var{M1}
-## and @var{M2} can be passed as a matrix or as a function handle or inline
-## function @code{g} such that @code{g(x) = M1 \ x} or @code{g(x) = M2 \ x}.
-##
-## If called with more than one output parameter
+## The output parameters are:
 ##
 ## @itemize @minus
+##
+## @item @var{x} is the approximation computed. If the method doesn't
+## converge then it is the iterated with the minimum residual.
+##
 ## @item @var{flag} indicates the exit status:
 ##
 ## @itemize @minus
@@ -52,161 +80,307 @@
 ##
 ## @item 1: the maximum number of iterations was reached before convergence
 ##
+## @item 2: the preconditioner matrix is singular
+##
 ## @item 3: the algorithm reached stagnation
+##
+## @item 4: the algorithm can't continue due to a division by zero
+## @end itemize
+##
+## @item @var{relres} is the relative residual obtained with as
+## @code{(@var{A}*@var{x}-@var{b}) / @code{norm(@var{b})}}.
+##
+## @item @var{iter} is the (possibily half) iteration which @var{x} is
+## computed. If it is an half iteration then it is @code{@var{iter} + 0.5}
+##
+## @item @var{resvec} is a vector containing the residual of each half and
+## total iteration (There are also the half iterations since @var{x} is
+## computed in two steps at each iteration).
+## Doing @code{(length(@var{resvec}) - 1) / 2} is possible to see the
+## total number of (total) iterations performed.
+##
 ## @end itemize
 ##
-## (the value 2 is unused but skipped for compatibility).
+## Let us consider a trivial problem with a tridiagonal matrix
+##
+## @example
+## @group
+## n = 20;
+## A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+##     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+##     sparse (1, 2, 1, 1, n) * n / 2);
+## b = A * ones (n, 1);
+## restart = 5;
+## [M1, M2] = ilu (A); # in this tridiag case, it corresponds to lu (A)
+## M = M1 * M2;
+## Afun = @@(x) A * x;
+## Mfun = @@(x) M \ x;
+## M1fun = @@(x) M1 \ x;
+## M2fun = @@(x) M2 \ x;
+## @end group
+## @end example
+##
+## @sc{Example 1:} simplest usage of @code{bicgstab}
+##
+## @example
+## x = bicgstab (A, b, [], n)
+## @end example
 ##
-## @item @var{relres} is the final value of the relative residual.
+## @sc{Example 2:} @code{bicgstab} with a function which computes
+## @code{@var{A} * @var{x}}
+##
+## @example
+## x = bicgstab (Afun, b, [], n)
+## @end example
+##
+## @sc{Example 3:} @code{bicgstab} with a preconditioner matrix @var{M}
+##
+## @example
+## x = bicgstab (A, b, [], 1e-06, n, M)
+## @end example
 ##
-## @item @var{iter} is the number of iterations performed.
+## @sc{Example 4:} @code{bicgstab} with a function as preconditioner
+##
+## @example
+## x = bicgstab (Afun, b, 1e-6, n, Mfun)
+## @end example
+##
+## @sc{Example 5:} @code{bicgstab} with preconditioner matrices @var{M1}
+## and @var{M2}
+##
+## @example
+## x = bicgstab (A, b, [], 1e-6, n, M1, M2)
+## @end example
 ##
-## @item @var{resvec} is a vector containing the relative residual at each
-## iteration.
-## @end itemize
+## @sc{Example 6:} @code{bicgstab} with functions as preconditioners
+##
+## @example
+## x = bicgstab (Afun, b, 1e-6, n, M1fun, M2fun)
+## @end example
+##
+## @sc{Example 7:} @code {bicgstab} with as input a function requiring
+## an argument
+##
+## @example
+## @group
+## function y = Ap (A, x, z) # compute A^z * x
+##    y = x;
+##    for i = 1:z
+##      y = A * y;
+##    endfor
+##  endfunction
+## Apfun = @@(x, string, p) Ap (A, x, string, p);
+## x = bicgstab (Apfun, b, [], [], [], [], [], 2);
+## @end group
+## @end example
+##
+## @sc{Example 8:} explicit example to show that @code{bicgstab} uses a
+## right preconditioner
 ##
-## @seealso{bicg, cgs, gmres, pcg, qmr}
+## @example
+## @group
+## [M1, M2] = ilu (A + 0.1 * eye (n)); # factorization of A perturbed
+## M = M1 * M2;
+##
+## ## reference solution computed by bicgstab after one iteration
+## [x_ref, fl] = bicgstab (A, b, [], 1, M)
+##
+## ## rigth preconditioning
+## [y, fl] = bicgstab (A / M, b, [], 1)
+## x = M \ y # compare x and x_ref
+##
+## @end group
+## @end example
+##
+## References:
+##
+## @enumerate
+##
+## @item @nospell{Y. Saad}, @cite{Iterative Methods for Sparse Linear
+## Systems}, Second edition, 2003, SIAM
+##
+## @end enumerate
+##
+## @seealso{bicg, cgs, gmres, pcg, qmr, tfqmr}
 ##
 ## @end deftypefn

-function [x, flag, relres, iter, resvec] = bicgstab (A, b, rtol, maxit,
-                                                     M1, M2, x0)
-
-  if (nargin < 2 || nargin > 7 || ! isvector (full (b)))
-    print_usage ();
-  endif
+function [x_min, flag, relres, iter_min, resvec] = ...
+         bicgstab (A, b, tol = [], maxit = [], M1 = [], M2 = [], ...
+                   x0 = [], varargin)

-  if (ischar (A))
-    Ax = str2func (A);
-  elseif (isnumeric(A) && issquare (A))
-    Ax = @(x) A  * x;
-  elseif (isa (A, "function_handle"))
-    Ax = @(x) feval (A, x);
-  else
-    error ("bicgstab: A must be a square matrix or function");
-  endif
+  ## Check consistency and  type of A, M1, M2
+  [Afun, M1fun, M2fun] =  __alltohandles__ (A, b, M1, M2, "bicgstab");

-  if (nargin < 3 || isempty (rtol))
-    rtol = 1e-6;
-  endif
-
-  if (nargin < 4 || isempty (maxit))
-    maxit = min (rows (b), 20);
-  endif
+  # Check if input tol are empty (set them to default if necessary)
+  [tol, maxit, x0] = __default__input__ ({1e-06, min(rows(b), 20), ...
+                    zeros(rows(b), 1)}, tol, maxit, x0);

-  if (nargin < 5 || isempty (M1))
-    M1m1x = @(x) x;
-  elseif (ischar (M1))
-    M1m1x = str2func (M1);
-  elseif (isnumeric(M1) && ismatrix (M1))
-    M1m1x = @(x) M1  \ x;
-  elseif (isa (M1, "function_handle"))
-    M1m1x = @(x) feval (M1, x);
-  else
-    error ("bicgstab: preconditioner M1 must be a function or matrix");
-  endif
-
-  if (nargin < 6 || isempty (M2))
-    M2m1x = @(x) x;
-  elseif (ischar (M2))
-    M2m1x = str2func (M2);
-  elseif (isnumeric(M2) && ismatrix (M2))
-    M2m1x = @(x) M2  \ x;
-  elseif (isa (M2, "function_handle"))
-    M2m1x = @(x) feval (M2, x);
-  else
-    error ("bicgstab: preconditioner M2 must be a function or matrix");
-  endif
-
-  precon = @(x) M2m1x (M1m1x (x));
-
-  if (nargin < 7 || isempty (x0))
-    x0 = zeros (size (b));
-  endif
-
-  ## specifies initial estimate x0
-  if (nargin < 7)
-    x = zeros (rows (b), 1);
-  else
-    x = x0;
+  norm_b = norm (b, 2);
+  if (norm_b == 0)
+    if (nargout < 2)
+      printf("The right hand side vector is all zero so bicgstab \n")
+      printf ("returned an all zero solution without iterating.\n")
+    endif
+    x_min = zeros (numel (b), 1);
+    iter_ min = 0;
+    flag = 0;
+    resvec = 0;
+    relres = 0;
+    return
   endif

-  norm_b = norm (b);
-
-  res = b - Ax (x);
-  rr = res;
-
-  ## Vector of the residual norms for each iteration.
-  resvec = norm (res) / norm_b;
-
-  ## Default behavior we don't reach tolerance rtol within maxit iterations.
+  ## Double maxit to mind also the "half iterations"
+  d_maxit = 2 * maxit;
+  iter = iter_min = 0;
+  resvec = zeros (d_maxit,1);
+  x = x_min = x_pr = x0;
+  iter = iter_min = 0;
+  ## deafult setting of flag is 1 (i.e. max number of iterations reached)
   flag = 1;

-  for iter = 1:maxit
-    rho_1 = rr' * res;
+  res = b - feval (Afun, x, varargin{:});
+  rr = p = res; # rr is r_star
+  rho_1 = rr' * res;
+  resvec (1) = norm (res,2);
+  real_tol = norm_b * tol;

-    if (iter == 1)
-      p = res;
-    else
-      beta = (rho_1 / rho_2) * (alpha / omega);
-      p = res + beta * (p - omega * v);
-    endif
-
-    phat = precon (p);
-
-    v = Ax (phat);
-    alpha = rho_1 / (rr' * v);
-    s = res - alpha * v;
+  ## To check if the preconditioners are singular or they have some NaN
+  try
+    warning("error", "Octave:singular-matrix", "local");
+    p_hat = feval (M1fun, p, varargin{:});
+    p_hat = feval (M2fun, p_hat, varargin{:});
+  catch
+    flag = 2;
+  end_try_catch

-    shat = precon (s);
-
-    t = Ax (shat);
-    omega = (s' * t) / (t' * t);
-    x += alpha * phat + omega * shat;
-    res = s - omega * t;
-    rho_2 = rho_1;
-
-    relres = norm (res) / norm_b;
-    resvec = [resvec; relres];
-
-    if (relres <= rtol)
-      ## We reach tolerance rtol within maxit iterations.
-      flag = 0;
+  while (flag !=2) && (iter < d_maxit) && (resvec (iter + 1) >= real_tol)
+    v = feval (Afun, p_hat, varargin{:});
+    prod_tmp = (rr' * v);
+    if (prod_tmp == 0)
+      flag = 4;
       break;
-    elseif (resvec(end) == resvec(end - 1))
-      ## The method stagnates.
-      flag = 3;
+    endif
+    alpha = rho_1 / (prod_tmp);
+    x += alpha * p_hat;
+    s = res - alpha * v;
+    iter += 1;
+    resvec (iter+1) = norm (s,2);
+    if (resvec (iter + 1) <= real_tol) # reached the tol
+      x_min = x;
+      iter_min = iter;
+      break
+    elseif (resvec (iter + 1) <= resvec (iter_min + 1)) # Found min residual
+      x_min = x;
+      iter_min = iter;
+    endif
+    s_hat = feval (M1fun, s, varargin{:});
+    s_hat = feval (M2fun, s_hat, varargin{:});
+    t = feval (Afun, s_hat, varargin{:});
+    omega = (t' * s) / (t' * t);
+    if (omega == 0) # x and residual don't change and the next it will be NaN
+      flag = 4;
       break;
     endif
-  endfor
+    x += omega * s_hat;
+    res = s - omega * t;
+    iter += 1;
+    resvec (iter + 1) = norm (res);
+    if (resvec (iter + 1) <= resvec (iter_min + 1))
+      x_min = x;
+      iter_min = iter;
+    endif
+    if (norm (x - x_pr) <= norm (x) * eps)
+      flag = 3;
+      break
+    endif
+    x_pr = x;
+    rho_2 = rho_1;
+    rho_1 = rr' * res;
+    if (rho_1 == 0) # x and residual don't change and the next it will be NaN
+      flag = 4;
+      break;
+    endif
+    beta = (rho_1 / rho_2) * (alpha / omega);
+    p = res + beta * (p - omega*  v);
+    p_hat = feval (M1fun, p, varargin{:});
+    p_hat = feval (M2fun, p_hat, varargin{:});
+  endwhile
+  resvec = resvec (1:iter+1,1);

+  relres = resvec (iter_min + 1) / norm_b; ## I set the relative residual
+  iter /=  2;
+  iter_min /= 2;
+
+  if (flag == 1) && (relres <= tol)
+    flag = 0;
+  endif
+
+  ## output strings to print when the outputs requested are less than 2
   if (nargout < 2)
-    if (flag == 0)
-      printf ("bicgstab converged at iteration %i ", iter);
-      printf ("to a solution with relative residual %e\n", relres);
-    elseif (flag == 3)
-      printf ("bicgstab stopped at iteration %i ", iter);
-      printf ("without converging to the desired tolerance %e\n", rtol);
-      printf ("because the method stagnated.\n");
-      printf ("The iterate returned (number %i) ", iter);
-      printf ("has relative residual %e\n", relres);
-    else
-      printf ("bicgstab stopped at iteration %i ", iter);
-      printf ("without converging to the desired toleranc %e\n", rtol);
-      printf ("because the maximum number of iterations was reached.\n");
-      printf ("The iterate returned (number %i) ", iter);
-      printf ("has relative residual %e\n", relres);
-    endif
+    switch (flag)
+      case {0}
+        printf ("bicgstab converged at iteration %i ", iter_min);
+        printf ("to a solution with relative residual %e\n", relres);
+      case {1}
+        printf ("bicgstab stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the maximum number of iterations was reached.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {2}
+        printf ("bicgstab stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the preconditioner matrix is singular.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {3}
+        printf ("bicgstab stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the method stagnated.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {4}
+        printf ("bicgstab stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the method can't continue.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+    endswitch
   endif

 endfunction

-
-%!demo
-%! % Solve system of A*x=b
-%! A = [5 -1 3;-1 2 -2;3 -2 3];
-%! b = [7;-1;4];
-%! [x, flag, relres, iter, resvec] = bicgstab (A, b)
+%!test
+%! ## Check that all type of inputs work
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M1 = diag (sqrt (diag (A)));
+%! M2 = M1;
+%! maxit = 20;
+%! Afun = @(z) A*z;
+%! M1_fun = @(z) M1 \ z;
+%! M2_fun = @(z) M2 \ z;
+%! [x, flag] = bicgstab (A,b );
+%! assert (flag, 0);
+%! [x, flag] = bicgstab (A, b, [], maxit, M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = bicgstab (A, b, [], maxit, M1_fun, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = bicgstab (A, b, [], maxit, M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = bicgstab (A, b, [], maxit, M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = bicgstab (Afun, b);
+%! assert (flag, 0);
+%! [x, flag] = bicgstab (Afun, b, [], maxit, M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = bicgstab (Afun, b, [], maxit, M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = bicgstab (Afun, b, [], maxit, M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = bicgstab (Afun, b, [], maxit, M1_fun, M2_fun);
+%! assert (flag, 0);

 %!shared A, b, n, M1, M2
 %!
@@ -214,11 +388,11 @@
 %! n = 100;
 %! A = spdiags ([-2*ones(n,1) 4*ones(n,1) -ones(n,1)], -1:1, n, n);
 %! b = sum (A, 2);
-%! rtol = 1e-8;
+%! tol = 1e-8;
 %! maxit = 15;
 %! M1 = spdiags ([ones(n,1)/(-2) ones(n,1)],-1:0, n, n);
 %! M2 = spdiags ([4*ones(n,1) -ones(n,1)], 0:1, n, n);
-%! [x, flag, relres, iter, resvec] = bicgstab (A, b, rtol, maxit, M1, M2);
+%! [x, flag, relres, iter, resvec] = bicgstab (A, b, tol, maxit, M1, M2);
 %! assert (x, ones (size (b)), 1e-7);
 %!
 %!test
@@ -226,24 +400,116 @@
 %!  y = a * x;
 %!endfunction
 %!
-%! rtol = 1e-8;
+%! tol = 1e-8;
 %! maxit = 15;
 %!
 %! [x, flag, relres, iter, resvec] = bicgstab (@(x) afun (x, A), b,
-%!                                             rtol, maxit, M1, M2);
+%!                                             tol, maxit, M1, M2);
 %! assert (x, ones (size (b)), 1e-7);

 %!test
 %! n = 100;
-%! rtol = 1e-8;
+%! tol = 1e-8;
 %! a = sprand (n, n, .1);
 %! A = a'*a + 100 * eye (n);
 %! b = sum (A, 2);
-%! [x, flag, relres, iter, resvec] = bicgstab (A, b, rtol, [], diag (diag (A)));
+%! [x, flag, relres, iter, resvec] = bicgstab (A, b, tol, [], diag (diag (A)));
 %! assert (x, ones (size (b)), 1e-7);

 %!test
+%! ## bicgstab solves complex linear systems
 %! A = [1 + 1i, 1 + 1i; 2 - 1i, 2 + 1i];
 %! b = A * [1; 1];
 %! [x, flag, relres, iter, resvec] = bicgstab (A, b);
 %! assert (x, [1; 1], 1e-6);
+
+%!test
+%! ## test with a non symmetric matrix
+%! A = diag(1:50);
+%! A (1,50) = 10000;
+%! b = ones (50,1);
+%! [x, flag, relres, iter, resvec] = bicgstab (A, b, [], 100);
+%! assert (flag, 0)
+%! assert (x, A \ b, 1e-05)
+%! ## test that bicgstab detects a singular preconditioner
+%! M = ones (50);
+%! M(1,1) = 0;
+%! [x, flag] = bicgstab (A, b, [], 100, M);
+%! assert(flag, 2)
+
+%!test
+%! A = single (1);
+%! b = 1;
+%! [x, flag] = bicgstab (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = 1;
+%! b = single (1);
+%! [x, flag] = bicgstab (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = single (1);
+%! b = single (1);
+%! [x, flag] = bicgstab (A, b);
+%! assert (class (x), "single")
+
+%!test
+%!function y = Afun (x)
+%!   A = toeplitz ([2, 1, 0, 0], [2, -1, 0, 0]);
+%!   y = A * x;
+%!endfunction
+%! [x, flag] = bicgstab ("Afun", [1; 2; 2; 3]);
+%! assert (x, ones(4, 1), 1e-6)
+
+%!test # unpreconditioned residual
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M = magic (5);
+%! [x, flag, relres] = bicgstab (A, b, [], 2, M);
+%! assert (relres, norm (b - A * x) / norm (b), 8 * eps)
+
+%!demo # simplest use
+%! n = 20;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+%!     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+%!     sparse (1, 2, 1, 1, n) * n / 2);
+%! b = A * ones (n, 1);
+%! [M1, M2] = ilu (A + 0.1 * eye (n));
+%! M = M1 * M2;
+%! x = bicgstab (A, b, [], n);
+%! Afun = @(x) A * x;
+%! x = bicgstab (Afun, b, [], n);
+%! x = bicgstab (A, b, 1e-6, n, M);
+%! x = bicgstab (A, b, 1e-6, n, M1, M2);
+%! Mfun = @(z) M \ z;
+%! x = bicgstab (Afun, b, 1e-6, n, Mfun);
+%! M1fun = @(z) M1 \ z;
+%! M2fun = @(z) M2 \ z;
+%! x = bicgstab (Afun, b, 1e-6, n, M1fun, M2fun);
+%! function y = Ap (A, x, z) # compute A^z * x or (A^z)' * x
+%!    y = x;
+%!    for i = 1:z
+%!      y = A * y;
+%!    endfor
+%!  endfunction
+%! Afun = @(x, p) Ap (A, x, p);
+%! x = bicgstab (Afun, b, [], 2 * n, [], [], [], 2); # solution of A^2 * x = b
+
+%!demo
+%! n = 10;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+%!     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+%!     sparse (1, 2, 1, 1, n) * n / 2);
+%! b = A * ones (n, 1);
+%! [M1, M2] = ilu (A + 0.3 * eye (n)); # factorization of A perturbed
+%! M = M1 * M2;
+%!
+%! ## reference solution computed by bicgstab after one iteration
+%! [x_ref, fl] = bicgstab (A, b, [], 1, M);
+%! x_ref
+%!
+%! ## right preconditioning
+%! [y, fl] = bicgstab (A / M, b, [], 1);
+%! x = M \ y # compare x and x_ref
--- a/scripts/sparse/cgs.m	Fri Mar 09 13:44:08 2018 +0200
+++ b/scripts/sparse/cgs.m	Sun Mar 11 07:29:04 2018 +0900
@@ -1,5 +1,6 @@
 ## Copyright (C) 2008-2017 Radek Salac
 ## Copyright (C) 2012 Carlo de Falco
+## Copyright (C) 2016 Cristiano Dorigo, Octave Arena
 ##
 ## This file is part of Octave.
 ##
@@ -18,33 +19,53 @@
 ## <https://www.gnu.org/licenses/>.

 ## -*- texinfo -*-
-## @deftypefn  {} {@var{x} =} cgs (@var{A}, @var{b}, @var{rtol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0})
-## @deftypefnx {} {@var{x} =} cgs (@var{A}, @var{b}, @var{rtol}, @var{maxit}, @var{P})
+## @deftypefn  {} {@var{x} =} cgs (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0}, @dots{})
+## @deftypefnx {} {@var{x} =} cgs (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M}, [], @var{x0}, @dots{})
 ## @deftypefnx {} {[@var{x}, @var{flag}, @var{relres}, @var{iter}, @var{resvec}] =} cgs (@var{A}, @var{b}, @dots{})
 ## Solve @code{A x = b}, where @var{A} is a square matrix, using the
 ## Conjugate Gradients Squared method.
 ##
+## The input arguments are:
+##
 ## @itemize @minus
-## @item @var{rtol} is the relative tolerance, if not given or set to [] the
+##
+## @item @var{A} is the matrix of the linear system and it must be square.
+## @var{A} can be passed as a matrix, function handle, or inline
+## function @code{Afun} such that @code{Afun(x) = A * x}. Additional
+## parameters to @code{Afun} are passed after @var{x0}.
+##
+## @item @var{b} is the right hand side vector. It must be a column vector
+## with same number of rows of @var{A}.
+##
+## @item @var{tol} is the relative tolerance, if not given or set to [] the
 ## default value 1e-6 is used.
 ##
 ## @item @var{maxit} the maximum number of outer iterations, if not given or
 ## set to [] the default value @code{min (20, numel (b))} is used.
 ##
+## @item @var{M1}, @var{M2} are the preconditioners. The preconditioner
+## matrix is given as @code{M = M1 * M2}.  Both @var{M1}
+## and @var{M2} can be passed as a matrix or as a function handle or inline
+## function @code{g} such that @code{g(x) = M1 \ x} or @code{g(x) = M2 \ x}.
+## If M1 is empty or not passed then no preconditioners are applied.
+## The techinque used is the right preconditioning, i.e. it is solved
+## @code{@var{A}*inv(@var{M})*y = b} and then @code{@var{x} = inv(@var{M})*y}.
+##
 ## @item @var{x0} the initial guess, if not given or set to [] the default
 ## value @code{zeros (size (b))} is used.
 ## @end itemize
 ##
-## @var{A} can be passed as a matrix or as a function handle or inline
-## function @code{f} such that @code{f(x) = A*x}.
+## The arguments which follow @var{x0} are treated as parameters, and passed in
+## a proper way to any of the functions (@var{A} or @var{P}) which are passed
+## to @code{cgs}.
 ##
-## The preconditioner @var{P} is given as @code{P = M1 * M2}.  Both @var{M1}
-## and @var{M2} can be passed as a matrix or as a function handle or inline
-## function @code{g} such that @code{g(x) = M1 \ x} or @code{g(x) = M2 \ x}.
-##
-## If called with more than one output parameter
+## The output parameters are:
 ##
 ## @itemize @minus
+##
+## @item @var{x} is the approximation computed. If the method doesn't
+## converge then it is the iterated with the minimum residual.
+##
 ## @item @var{flag} indicates the exit status:
 ##
 ## @itemize @minus
@@ -52,171 +73,399 @@
 ##
 ## @item 1: the maximum number of iterations was reached before convergence
 ##
+## @item 2: the preconditioner matrix is singular
+##
 ## @item 3: the algorithm reached stagnation
+##
+## @item 4: the algorithm can't continue due to a division by zero
+## @end itemize
+##
+## @item @var{relres} is the relative residual obtained with as
+## @code{(@var{A}*@var{x}-@var{b}) / @code{norm(@var{b})}}.
+##
+## @item @var{iter} is the iteration which @var{x} is computed.
+##
+## @item @var{resvec} is a vector containing the residual at each iteration.
+## Doing @code{length(@var{resvec}) - 1} is possible to see the total number
+## of iterations performed.
 ## @end itemize
 ##
-## (the value 2 is unused but skipped for compatibility).
+## Let us consider a trivial problem with a tridiagonal matrix
+##
+## @example
+## @group
+## n = 20;
+## A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+##     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+##     sparse (1, 2, 1, 1, n) * n / 2);
+## b = A * ones (n, 1);
+## restart = 5;
+## [M1, M2] = ilu (A); # in this tridiag case it corresponds to chol (A)'
+## M = M1 * M2;
+## Afun = @@(x) A * x;
+## Mfun = @@(x) M \ x;
+## M1fun = @@(x) M1 \ x;
+## M2fun = @@(x) M2 \ x;
+## @end group
+## @end example
+##
+## @sc{Example 1:} simplest usage of @code{cgs}
 ##
-## @item @var{relres} is the final value of the relative residual.
+## @example
+## x = cgs (A, b, [], n)
+## @end example
+##
+## @sc{Example 2:} @code{cgs} with a function which computes
+## @code{@var{A} * @var{x}}
+##
+## @example
+## x = cgs (Afun, b, [], n)
+## @end example
+##
+## @sc{Example 3:} @code{cgs} with a preconditioner matrix @var{M}
 ##
-## @item @var{iter} is the number of iterations performed.
+## @example
+## x = cgs (A, b, [], 1e-06, n, M)
+## @end example
+##
+## @sc{Example 4:} @code{cgs} with a function as preconditioner
+##
+## @example
+## x = cgs (Afun, b, 1e-6, n, Mfun)
+## @end example
+##
+## @sc{Example 5:} @code{cgs} with preconditioner matrices @var{M1}
+## and @var{M2}
 ##
-## @item @var{resvec} is a vector containing the relative residual at
-## each iteration.
-## @end itemize
+## @example
+## x = cgs (A, b, [], 1e-6, n, M1, M2)
+## @end example
+##
+## @sc{Example 6:} @code{cgs} with functions as preconditioners
+##
+## @example
+## x = cgs (Afun, b, 1e-6, n, M1fun, M2fun)
+## @end example
+##
+## @sc{Example 7:} @code {cgs} with as input a function requiring an argument
+##
+## @example
+## @group
+## function y = Ap (A, x, z) # compute A^z * x
+##    y = x;
+##    for i = 1:z
+##      y = A * y;
+##    endfor
+##  endfunction
+## Apfun = @@(x, string, p) Ap (A, x, string, p);
+## x = cgs (Apfun, b, [], [], [], [], [], 2);
+## @end group
+## @end example
 ##
-## @seealso{pcg, bicgstab, bicg, gmres, qmr}
+## @sc{Example 8:} explicit example to show that @code{cgs} uses a
+## right preconditioner
+##
+## @example
+## @group
+## [M1, M2] = ilu (A + 0.3 * eye (n)); # factorization of A perturbed
+## M = M1 * M2;
+##
+## ## reference solution computed by cgs after one iteration
+## [x_ref, fl] = cgs (A, b, [], 1, M)
+##
+## ## rigth preconditioning
+## [y, fl] = cgs (A / M, b, [], 1)
+## x = M \ y # compare x and x_ref
+##
+## @end group
+## @end example
+##
+## References:
+##
+## @enumerate
+##
+## @item @nospell{Y. Saad}, @cite{Iterative Methods for Sparse Linear Systems},
+## Second edition, 2003, SIAM
+##
+## @end enumerate
+##
+## @seealso{pcg, bicgstab, bicg, gmres, qmr, tfqmr}
 ## @end deftypefn

-function [x, flag, relres, iter, resvec] = cgs (A, b, rtol, maxit, M1, M2, x0)
-
-  if (nargin >= 2 && nargin <= 7 && isvector (full (b)))
+function [x_min, flag, relres, iter_min, resvec] = ...
+         cgs (A, b, tol = [], maxit = [], M1 = [] , M2 = [], x0 = [], varargin)

-    if (ischar (A))
-      Ax = str2func (A);
-    elseif (isnumeric (A) && issquare (A))
-      Ax = @(x) A * x;
-    elseif (isa (A, "function_handle"))
-      Ax = @(x) feval (A, x);
-    else
-      error ("cgs: A must be a square matrix or function");
-    endif
+  [Afun, M1fun, M2fun] = __alltohandles__ (A, b, M1, M2, "cgs");

-    if (nargin < 3 || isempty (rtol))
-      rtol = 1e-6;
-    endif
-
-    if (nargin < 4 || isempty (maxit))
-      maxit = min (rows (b), 20);
-    endif
+  [tol, maxit, x0] = __default__input__ ({1e-06, min( rows(b), 20), ...
+                                          zeros(size(b))}, tol, maxit, x0);

-    if (nargin < 5 || isempty (M1))
-      M1m1x = @(x) x;
-    elseif (ischar (M1))
-      M1m1x = str2func (M1);
-    elseif (isnumeric (M1) && ismatrix (M1))
-      M1m1x = @(x) M1 \ x;
-    elseif (isa (M1, "function_handle"))
-      M1m1x = @(x) feval (M1, x);
-    else
-      error ("cgs: preconditioner M1 must be a function or matrix");
+  norm_b = norm (b, 2);
+  if (norm_b == 0)
+    if (nargout < 2)
+      printf("The right hand side vector is all zero so cgs \n")
+      printf ("returned an all zero solution without iterating.\n")
     endif
+    x_min = zeros (numel (b), 1);
+    iter_ min = 0;
+    flag = 0;
+    resvec = 0;
+    relres = 0;
+    return
+  endif
+
+  resvec = zeros (maxit, 1); # Preallocation of resvec

-    if (nargin < 6 || isempty (M2))
-      M2m1x = @(x) x;
-    elseif (ischar (M2))
-      M2m1x = str2func (M2);
-    elseif (isnumeric (M2) && ismatrix (M2))
-      M2m1x = @(x) M2 \ x;
-    elseif (isa (M2, "function_handle"))
-      M2m1x = @(x) feval (M2, x);
-    else
-      error ("cgs: preconditioner M2 must be a function or matrix");
-    endif
+  flag = 1; # Default flag is 1, i.e. maximum number of iterations reached
+  iter = iter_min = 0;
+  x = x_min = x_pr = x0;
+  ## x approximation at the actual iteration
+  ## x_min approximation with the minimum residual
+  ## x_pr approximation at the previous iteration (to check stagnation)

-    precon = @(x) M2m1x (M1m1x (x));
+  r0 = rr = u = p = b - feval (Afun, x, varargin{:});
+  resvec (1) = norm (r0, 2);
+  rho_1 = rr' * r0;

-    if (nargin < 7 || isempty (x0))
-      x0 = zeros (size (b));
-    endif
-
-
-    x = x0;
+  try
+    warning ("error","Octave:singular-matrix","local")
+    p_hat = feval (M1fun, p, varargin{:});
+    p_hat = feval (M2fun, p_hat, varargin {:});
+  catch
+    flag = 2;
+  end_try_catch

-    res = b - Ax (x);
-    norm_b = norm (b);
-    ## Vector of the residual norms for each iteration.
-    resvec = norm (res) / norm_b;
-    ro = 0;
-    ## Default behavior we don't reach tolerance rtol within maxit iterations.
-    flag = 1;
-    for iter = 1:maxit
-
-      z = precon (res);
-
-      ## Cache.
-      ro_old = ro;
-      ro = res' * z;
-      if (iter == 1)
-        p = z;
-      else
-        beta = ro / ro_old;
-        p = z + beta * p;
-      endif
-      ## Cache.
-      q = Ax (p);
-      alpha = ro / (p' * q);
-      x += alpha * p;
-
-      res -= alpha * q;
-      relres = norm (res) / norm_b;
-      resvec = [resvec; relres];
+  while ((flag != 2) && (iter < maxit) && ...
+         (resvec (iter + 1) >= tol * norm_b))
+    v = feval (Afun, p_hat, varargin{:});
+    prod_tmp = (rr' * v);
+    if (prod_tmp == 0)
+      flag = 4;
+      break;
+    endif
+    alpha = rho_1 / prod_tmp;
+    q = u - alpha * v;
+    u_hat = feval(M1fun, u + q, varargin{:});
+    u_hat = feval (M2fun, u_hat, varargin{:});
+    x += alpha*u_hat;
+    r0 -= alpha* feval (Afun, u_hat, varargin{:});
+    iter += 1;
+    resvec (iter + 1) = norm (r0, 2);
+    if (norm (x - x_pr, 2) <= norm(x, 2) * eps) # Stagnation
+      flag = 3;
+      break;
+    endif
+    if (resvec (iter + 1) <= resvec (iter_min + 1)) # Check min residual
+      x_min = x;
+      iter_min = iter;
+    endif
+    x_pr = x;
+    rho_2 = rho_1;
+    rho_1 = rr' * r0;
+    if (rho_1 == 0)
+      flag = 4;
+      break;
+    endif
+    beta = rho_1 / rho_2;
+    u = r0 + beta * q;
+    p = u + beta * (q + beta * p);
+    p_hat = feval (M1fun, p, varargin {:});
+    p_hat = feval (M2fun, p_hat, varargin{:});
+  endwhile
+  resvec = resvec (1: (iter + 1));

-      if (relres <= rtol)
-        ## We reach tolerance rtol within maxit iterations.
-        flag = 0;
-        break
-      elseif (resvec(end) == resvec(end - 1))
-        ## The method stagnates.
-        flag = 3;
-        break
-      endif
-    endfor
+  relres = resvec (iter_min + 1) / norm_b;
+  if (relres <= tol) && (flag = 1)
+    flag = 0;
+  endif

-    if (nargout < 1)
-      if (flag == 0)
-        printf ("cgs converged at iteration %i to a solution with relative residual %e\n",
-                iter, relres);
-      elseif (flag == 3)
-        printf (["cgs stopped at iteration %i without converging to the desired tolerance %e\n",
-                 "because the method stagnated.\n",
-                 "The iterate returned (number %i) has relative residual %e\n"],
-                iter, rtol, iter, relres);
-      else
-        printf (["cgs stopped at iteration %i without converging to the desired tolerance %e\n",
-                 "because the maximum number of iterations was reached.\n",
-                 "The iterate returned (number %i) has relative residual %e\n"],
-                iter, rtol, iter, relres);
-      endif
-    endif
-
-  else
-    print_usage ();
+  if (nargout < 2)
+    switch (flag)
+      case {0}
+        printf ("cgs converged at iteration %i ", iter_min);
+        printf ("to a solution with relative residual %e\n", relres);
+      case {1}
+        printf ("cgs stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the maximum number of iterations was reached.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {2}
+        printf ("cgs stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the preconditioner matrix is singular.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {3}
+        printf ("cgs stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the method stagnated.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {4}
+        printf ("cgs stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the method can't continue.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+    endswitch
   endif

 endfunction

-
 %!demo
 %! % Solve system of A*x=b
 %! A = [5 -1 3;-1 2 -2;3 -2 3];
 %! b = [7;-1;4];
 %! [a,b,c,d,e] = cgs (A,b)

+%!demo # simplest use
+%! n = 20;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+%!     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+%!     sparse (1, 2, 1, 1, n) * n / 2);
+%! b = A * ones (n, 1);
+%! [M1, M2] = ilu (A + 0.1 * eye (n));
+%! M = M1 * M2;
+%! x = cgs (A, b, [], n);
+%! Afun = @(x) A * x;
+%! x = cgs (Afun, b, [], n);
+%! x = cgs (A, b, 1e-6, n, M);
+%! x = cgs (A, b, 1e-6, n, M1, M2);
+%! Mfun = @(z) M \ z;
+%! x = cgs (Afun, b, 1e-6, n, Mfun);
+%! M1fun = @(z) M1 \ z;
+%! M2fun = @(z) M2 \ z;
+%! x = cgs (Afun, b, 1e-6, n, M1fun, M2fun);
+%! function y = Ap (A, x, z) # compute A^z * x or (A^z)' * x
+%!    y = x;
+%!    for i = 1:z
+%!      y = A * y;
+%!    endfor
+%!  endfunction
+%! Afun = @(x, p) Ap (A, x, p);
+%! x = cgs (Afun, b, [], 2*n, [], [], [], 2); # solution of A^2 * x = b
+
+%!demo
+%! n = 10;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+%!     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+%!     sparse (1, 2, 1, 1, n) * n / 2);
+%! b = A * ones (n, 1);
+%! [M1, M2] = ilu (A + 0.3 * eye (n)); # factorization of A perturbed
+%! M = M1 * M2;
+%!
+%! ## reference solution computed by cgs after one iteration
+%! [x_ref, fl] = cgs (A, b, [], 1, M);
+%! x_ref
+%!
+%! ## right preconditioning
+%! [y, fl] = cgs (A / M, b, [], 1);
+%! x = M \ y # compare x and x_ref
+
+%!test
+%! ## Check that all type of inputs work
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M1 = diag (sqrt (diag (A)));
+%! M2 = M1;
+%! maxit = 10;
+%! Afun = @(z) A * z;
+%! M1_fun = @(z) M1 \ z;
+%! M2_fun = @(z) M2 \ z;
+%! [x, flag] = cgs (A,b);
+%! assert(flag, 0);
+%! [x, flag] = cgs (A, b, [], maxit, M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = cgs (A, b, [], maxit, M1_fun, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = cgs (A, b, [], maxit, M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = cgs (A, b, [], maxit, M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = cgs (Afun, b);
+%! assert (flag, 0);
+%! [x, flag] = cgs (Afun, b, [], maxit, M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = cgs (Afun, b, [], maxit, M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = cgs (Afun, b, [], maxit, M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = cgs (Afun, b, [], maxit, M1_fun, M2_fun);
+%! assert (flag, 0);
+
 %!shared A, b, n, M
 %!
 %!test
 %! n = 100;
 %! A = spdiags ([-ones(n,1) 4*ones(n,1) -ones(n,1)], -1:1, n, n);
 %! b = sum (A, 2);
-%! rtol = 1e-8;
+%! tol = 1e-8;
 %! maxit = 1000;
-%! M = 4*eye (n);
-%! [x, flag, relres, iter, resvec] = cgs (A, b, rtol, maxit, M);
+%! M = 4 * eye (n);
+%! [x, flag, relres, iter, resvec] = cgs (A, b, tol, maxit, M);
 %! assert (x, ones (size (b)), 1e-7);
 %!
 %!test
-%! rtol = 1e-8;
+%! tol = 1e-8;
 %! maxit = 15;
-%!
-%! [x, flag, relres, iter, resvec] = cgs (@(x) A * x, b, rtol, maxit, M);
+%! [x, flag, relres, iter, resvec] = cgs (@(x) A * x, b, tol, maxit, M);
 %! assert (x, ones (size (b)), 1e-7);

 %!test
 %! n = 100;
-%! rtol = 1e-8;
+%! tol = 1e-8;
 %! a = sprand (n, n, .1);
 %! A = a'*a + 100 * eye (n);
 %! b = sum (A, 2);
-%! [x, flag, relres, iter, resvec] = cgs (A, b, rtol, [], diag (diag (A)));
+%! [x, flag, relres, iter, resvec] = cgs (A, b, tol, [], diag (diag (A)));
 %! assert (x, ones (size (b)), 1e-7);
+
+%!test
+%! n = 5;
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M = ones(n);
+%! [x, flag] = cgs (A, b, [], [], M);
+%! assert (flag, 2)
+
+%!test
+%! A = single (1);
+%! b = 1;
+%! [x, flag] = cgs (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = 1;
+%! b = single (1);
+%! [x, flag] = cgs (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = single (1);
+%! b = single (1);
+%! [x, flag] = cgs (A, b);
+%! assert (class (x), "single")
+
+%!test
+%!function y = Afun (x)
+%!   A = toeplitz ([2, 1, 0, 0], [2, -1, 0, 0]);
+%!   y = A * x;
+%!endfunction
+%! [x, flag] = cgs ("Afun", [1; 2; 2; 3]);
+%! assert (x, ones(4, 1), 1e-6)
+
+%!test
+%! ## test for a complex linear system
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0])) + ...
+%! 1i * toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! [x, flag] = cgs (A, b);
+%! assert (flag, 0)
+
+%!test # unpreconditioned residual
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M = magic (5);
+%! [x, flag, relres] = cgs (A, b, [], 3, M);
+%! assert (relres, norm (b - A * x) / norm (b), 8 * eps)
--- a/scripts/sparse/gmres.m	Fri Mar 09 13:44:08 2018 +0200
+++ b/scripts/sparse/gmres.m	Sun Mar 11 07:29:04 2018 +0900
@@ -1,4 +1,5 @@
 ## Copyright (C) 2009-2017 Carlo de Falco
+## Copyright (C) 2016-2017 Cristiano Dorigo
 ##
 ## This file is part of Octave.
 ##
@@ -17,36 +18,69 @@
 ## <https://www.gnu.org/licenses/>.

 ## -*- texinfo -*-
-## @deftypefn  {} {@var{x} =} gmres (@var{A}, @var{b}, @var{m}, @var{rtol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0})
-## @deftypefnx {} {@var{x} =} gmres (@var{A}, @var{b}, @var{m}, @var{rtol}, @var{maxit}, @var{P})
-## @deftypefnx {} {[@var{x}, @var{flag}, @var{relres}, @var{iter}, @var{resvec}] =} gmres (@dots{})
+## @deftypefn  {} {@var{x} =} gmres (@var{A}, @var{b}, @var{restart}, @var{tol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0}, @dots{})
+## @deftypefnx {} {@var{x} =} gmres (@var{A}, @var{b}, @var{restart}, @var{tol}, @var{maxit}, @var{M}, [], @var{x0}, @dots{})
+## @deftypefnx {} {[@var{x}, @var{flag}, @var{relres}, @var{iter}, @var{resvec}] =} gmres (@var{A}, @var{b}, @dots{})
 ## Solve @code{A x = b} using the Preconditioned GMRES iterative method with
-## restart, a.k.a. PGMRES(m).
+## restart, a.k.a. PGMRES(restart).
 ##
+## The input arguments are:
 ## @itemize @minus
-## @item @var{rtol} is the relative tolerance,
-## if not given or set to [] the default value 1e-6 is used.
+##
+## @item @var{A} is the matrix of the linear system and it must be square.
+## @var{A} can be passed as a matrix, function handle, or inline
+## function @code{Afun} such that @code{Afun(x) = A * x}. Additional
+## parameters to @code{Afun} are passed after @var{x0}.
+##
+## @item @var{b} is the right hand side vector. It must be a column vector
+## with the same numbers of rows as @var{A}.
+##
+## @item @var{restart} is the number of iterations before that the
+## method restarts. If it is [] or N = numel (b), then the restart
+## is not applied.
+##
+## @item @var{tol} is the required relative tolerance for the
+## preconditioned residual error,
+## @code{inv (@var{M}) * (@var{b} - @var{a} * @var{x})}. The iteration stops if
+## @code{norm (inv (@var{M}) * (@var{b} - @var{a} * @var{x})) <=
+## @var{tol} * norm (inv (@var{M}) * @var{B})}.  If @var{tol} is omitted or
+## empty, then a tolerance of 1e-6 is used.
 ##
 ## @item @var{maxit} is the maximum number of outer iterations, if not given or
-## set to [] the default value @code{min (10, numel (b) / restart)} is used.
+## set to [], then the default value @code{min (10, @var{N} / @var{restart})}
+## is used.
+## Note that, if @var{restart} is empty, then @var{maxit} is the maximum number
+## of iterations. If @var{restart} and @var{maxit} are not empty, then
+## the maximum number of iterations is @code{@var{restart} * @var{maxit}}.
+## If both @var{restart} and @var{maxit} are empty, then the maximum
+## number of iterations is set to @code{min (10, @var{N})}.
+##
+## @item @var{M1}, @var{M2} are the preconditioners. The preconditioner
+## @var{M} is given as @code{M = M1 * M2}. Both @var{M1} and @var{M2} can
+## be passed as a matrix, function handle, or inline function @code{g} such
+## that @code{g(x) = M1 \ x} or @code{g(x) = M2 \ x}. If @var{M1} is [] or not
+## given, then the preconditioner is not applied.
+## The technique used is the left-preconditioning, i.e., it is solved
+## @code{inv(@var{M}) * @var{A} * @var{x} = inv(@var{M}) * @var{b}} instead of
+## @code{@var{A} * @var{x} = @var{b}}.
 ##
 ## @item @var{x0} is the initial guess,
-## if not given or set to [] the default value @code{zeros (size (b))} is used.
+## if not given or set to [], then the default value
+## @code{zeros (size (@var{b}))} is used.
 ##
-## @item @var{m} is the restart parameter,
-## if not given or set to [] the default value @code{numel (b)} is used.
 ## @end itemize
 ##
-## Argument @var{A} can be passed as a matrix, function handle, or inline
-## function @code{f} such that @code{f(x) = A*x}.
+## The arguments which follow @var{x0} are treated as parameters, and passed in
+## a proper way to any of the functions (@var{A} or @var{M} or
+## @var{M1} or @var{M2}) which are passed to @code{gmres}.
 ##
-## The preconditioner @var{P} is given as @code{P = M1 * M2}.  Both @var{M1}
-## and @var{M2} can be passed as a matrix, function handle, or inline function
-## @code{g} such that @code{g(x) = M1\x} or @code{g(x) = M2\x}.
-##
-## Besides the vector @var{x}, additional outputs are:
+## The outputs are:
 ##
 ## @itemize @minus
+##
+## @item @var{x} the computed approximation. If the method does not
+## converge, then it is the iterated with minimum residual.
+##
 ## @item @var{flag} indicates the exit status:
 ##
 ## @table @asis
@@ -54,182 +88,555 @@
 ##
 ## @item 1 : maximum number of iterations exceeded
 ##
-## @item 2 : unused, but skipped for compatibility
+## @item 2 : the preconditioner matrix is singular
 ##
-## @item 3 : algorithm reached stagnation (no change between iterations)
+## @item 3 : algorithm reached stagnation (the relative difference between two
+## consecutive iterations is less than eps)
 ## @end table
 ##
-## @item @var{relres} is the final value of the relative residual.
+## @item @var{relres} is the value of the relative preconditioned
+## residual of the approximation @var{x}.
 ##
 ## @item @var{iter} is a vector containing the number of outer iterations and
-## total iterations performed.
+## inner iterations performed to compute @var{x}. That is:
+##
+## @itemize
+## @item @var{iter(1)}: number of outer iterations, i.e. how many
+## times the method restarted. (if @var{restart} is empty or @var{N},
+## then it is 1, if not 1 <= @var{iter(1)} <= @var{maxit}).
 ##
-## @item @var{resvec} is a vector containing the relative residual at each
-## iteration.
+## @item @var{iter(2)}: the number of iterations performed before the
+## restart,  i.e., the method restarts when
+## @code{@var{iter(2)} = @var{restart}}. If @var{restart} is empty or
+## @var{N}, then 1 <= @var{iter(2)} <= @var{maxit}.
+## @end itemize
+##
+## To be more clear, the approximation @var{x} is computed at the iteration
+## @code{(@var{iter(1)} - 1) * @var{restart} + @var{iter(2)}}.
+## Since the output @var{x} corresponds to the minimal preconditioned
+## residual solution, the total number of iterations that
+## the method performed is given by @code{length (resvec) - 1}.
+##
+## @item @var{resvec} is a vector containing the preconditioned
+## relative residual at each iteration, including the 0-th iteration
+## @code{norm (@var{A} * @var{x0} - @var{b})}.
 ## @end itemize
 ##
-## @seealso{bicg, bicgstab, cgs, pcg, pcr, qmr}
+## Let us consider a trivial problem with a tridiagonal matrix
+##
+## @example
+## @group
+## n = 20;
+## A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+##     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+##     sparse (1, 2, 1, 1, n) * n / 2);
+## b = A * ones (n, 1);
+## restart = 5;
+## [M1, M2] = ilu (A); # in this tridiag case, it corresponds to lu (A)
+## M = M1 * M2;
+## Afun = @@(x) A * x;
+## Mfun = @@(x) M \ x;
+## M1fun = @@(x) M1 \ x;
+## M2fun = @@(x) M2 \ x;
+## @end group
+## @end example
+##
+## @sc{Example 1:} simplest usage of @code{gmres}
+##
+## @example
+## x = gmres (A, b, [], [], n)
+## @end example
+##
+## @sc{Example 2:} @code{gmres} with a function which computes
+## @code{@var{A} * @var{x}}
+##
+## @example
+## x = gmres (Afun, b, [], [], n)
+## @end example
+##
+## @sc{Example 3:} usage of @code{gmres} with the restart
+##
+## @example
+## x = gmres (A, b, restart);
+## @end example
+##
+## @sc{Example 4:} @code{gmres} with a preconditioner matrix @var{M}
+## with and without restart
+## @example
+## @group
+## x = gmres (A, b, [], 1e-06, n, M)
+## x = gmres (A, b, restart, 1e-06, n, M)
+## @end group
+## @end example
+##
+## @sc{Example 5:} @code{gmres} with a function as preconditioner
+##
+## @example
+## x = gmres (Afun, b, [], 1e-6, n, Mfun)
+## @end example
+##
+## @sc{Example 6:} @code{gmres} with preconditioner matrices @var{M1}
+## and @var{M2}
+##
+## @example
+## x = gmres (A, b, [], 1e-6, n, M1, M2)
+## @end example
+##
+## @sc{Example 7:} @code{gmres} with functions as preconditioners
+##
+## @example
+## x = gmres (Afun, b, 1e-6, n, M1fun, M2fun)
+## @end example
+##
+## @sc{Example 8:} @code {gmres} with as input a function requiring an argument
+##
+## @example
+## @group
+##   function y = Ap (A, x, p) # compute A^p * x
+##      y = x;
+##      for i = 1:p
+##        y = A * y;
+##      endfor
+##   endfunction
+## Apfun = @@(x, p) Ap (A, x, p);
+## x = gmres (Apfun, b, [], [], [], [], [], [], 2);
+## @end group
+## @end example
+##
+## @sc{Example 9:} explicit example to show that @code{gmres} uses a
+## left preconditioner
+##
+## @example
+## @group
+## [M1, M2] = ilu (A + 0.1 * eye (n)); # factorization of A perturbed
+## M = M1 * M2;
+##
+## ## reference solution computed by gmres after two iterations
+## [x_ref, fl] = gmres (A, b, [], [], 1, M)
+##
+## ## left preconditioning
+## [x, fl] = gmres (M \ A, M \ b, [], [], 1)
+## x # compare x and x_ref
+##
+## @end group
+## @end example
+##
+## References:
+##
+## @enumerate
+##
+## @item @nospell{Y. Saad}, @cite{Iterative Methods for Sparse Linear
+## Systems}, Second edition, 2003, SIAM
+##
+## @end enumerate
+## @seealso{bicg, bicgstab, cgs, pcg, pcr, qmr, tfqmr}
 ## @end deftypefn

-function [x, flag, relres, it, resvec] = gmres (A, b, restart, rtol, maxit, M1, M2, x0)
-
-  if (nargin < 2 || nargin > 8)
-    print_usage ();
-  endif

-  if (ischar (A))
-    Ax = str2func (A);
-  elseif (isnumeric (A) && issquare (A))
-    Ax = @(x) A*x;
-  elseif (isa (A, "function_handle"))
-    Ax = A;
+function [x_min, flag, relres, it, resvec] = ...
+         gmres (A, b, restart = [], tol = [], maxit = [], M1 = [],
+                M2 = [], x0 = [], varargin)
+
+  if (strcmp (class (A), "single") || strcmp (class (b), "single"))
+    class_name = "single";
   else
-    error ("gmres: A must be a function or square matrix");
-  endif
-
-  if (nargin < 3 || isempty (restart))
-    restart = rows (b);
-  endif
-
-  if (nargin < 4 || isempty (rtol))
-    rtol = 1e-6;
-  endif
-
-  if (nargin < 5 || isempty (maxit))
-    maxit = min (rows (b)/restart, 10);
+    class_name = "double";
   endif

-  if (nargin < 6 || isempty (M1))
-    M1m1x = @(x) x;
-  elseif (ischar (M1))
-    M1m1x = str2func (M1);
-  elseif (isnumeric (M1) && ismatrix (M1))
-    M1m1x = @(x) M1 \ x;
-  elseif (isa (M1, "function_handle"))
-    M1m1x = M1;
-  else
-    error ("gmres: preconditioner M1 must be a function or matrix");
-  endif
+  [Afun, M1fun, M2fun] = __alltohandles__ (A, b, M1, M2, "gmres");
+
+  ## Check if the inputs are empty, and in case set them
+  [tol, x0] = __default__input__ ({1e-06, zeros(size (b))}, tol, x0);

-  if (nargin < 7 || isempty (M2))
-    M2m1x = @(x) x;
-  elseif (ischar (M2))
-    M2m1x = str2func (M2);
-  elseif (isnumeric (M2) && ismatrix (M2))
-    M2m1x = @(x) M2 \ x;
-  elseif (isa (M2, "function_handle"))
-    M2m1x = M2;
-  else
-    error ("gmres: preconditioner M2 must be a function or matrix");
+  empty_restart = isempty (restart);
+  empty_maxit = isempty (maxit);
+  size_b = rows (b);
+
+  if (tol >= 1)
+    warning("Input tol is bigger than 1. \n Try to use a smaller tolerance.");
+  elseif (tol <= eps / 2)
+    warning("Input tol may not be achievable by gmres. \n Try to use a bigger tolerance.");
   endif

-  Pm1x = @(x) M2m1x (M1m1x (x));
+  ## This big "if block" is to set maxit and restart in the proper way

-  if (nargin < 8 || isempty (x0))
-    x0 = zeros (size (b));
+  if ((empty_restart) && (empty_maxit))
+    restart = size_b;
+    maxit = 1;
+    max_iter_number = min (size_b, 10);
+  elseif (restart <= 0) || (maxit <= 0)
+    error ("gmres: MAXIT and RESTART must be positive integers")
+  elseif (restart < size_b) && (empty_maxit)
+    maxit = min (size_b / restart, 10);
+    max_iter_number = maxit * restart;
+  elseif (restart == size_b) && (empty_maxit)
+    maxit = 1;
+    max_iter_number = min (size_b, 10);
+  elseif (restart > size_b) && (empty_maxit)
+    warning ("RESTART is %d but it should be bounded by SIZE(A,2).\n Setting restart to %d. \n", restart, size_b)
+    restart = size_b;
+    maxit = 1;
+    max_iter_number = restart;
+  elseif (empty_restart) && (maxit <= size_b)
+    restart = size_b;
+    max_iter_number = maxit;
+  elseif (empty_restart) && (maxit > size_b)
+    warning ("MAXIT is %d but it should be bounded by SIZE(A,2). \n Setting MAXIT to %d", maxit, size_b);
+    restart = size_b;
+    maxit = size_b;
+    max_iter_number = size_b;
+  elseif (restart > size_b) && (!empty_maxit)
+    warning ("RESTART is %d but it should be bounded by SIZE(A,2).\n Setting restart to %d. \n", restart, size_b)
+    restart = size_b;
+    max_iter_number = restart * maxit;
+  elseif (restart == size_b) && (maxit <= size_b)
+    max_iter_number = maxit;
+  else
+    max_iter_number = restart*maxit;
   endif

-  x_old = x0;
-  x = x_old;
-  prec_res = Pm1x (b - Ax (x_old));
-  presn = norm (prec_res, 2);
+  prec_b_norm = norm (b, 2);
+  if (prec_b_norm == 0)
+    if (nargout < 2)
+      printf("The right hand side vector is all zero so gmres\nreturned an all zero solution without iterating.\n")
+    endif
+    x_min = b;
+    flag = 0;
+    relres = 0;
+    resvec = 0;
+    it = [0, 0];
+    return
+  endif

+  ## gmres: function handle case
+
+  x_old = x_pr = x_min = x = x0;
   B = zeros (restart + 1, 1);
-  V = zeros (rows (x), restart);
+  V = zeros (rows (x), restart, class_name);
   H = zeros (restart + 1, restart);

+  iter = 1; # total number of iterations
+  iter_min = 0; # iteration with minimum residual
+  outer_it = 1; # number of outer iterations
+  restart_it  =  1; # number of inner iterations
+  it = zeros(1, 2);
+  resvec = zeros (max_iter_number + 1, 1);
+  flag = 1; # Default flag is maximum # of iterations exceeded
+
   ## begin loop
-  iter = 1;
-  restart_it  = restart + 1;
-  resvec      = zeros (maxit, 1);
-  resvec(1)   = presn;
-  prec_b_norm = norm (Pm1x (b), 2);
-  flag        = 1;  # Default flag is maximum # of iterations exceeded
+  u = feval (Afun, x_old, varargin{:});
+  try
+    warning("error", "Octave:singular-matrix", "local")
+    prec_res = feval (M1fun, b - u, varargin{:}); # M1*(b-u)
+    prec_res = feval (M2fun, prec_res, varargin{:});
+    presn = norm (prec_res, 2);
+    resvec(1) = presn;
+    z = feval (M1fun, b, varargin{:});
+    z = feval (M2fun, z, varargin{:});
+    prec_b_norm = norm (z, 2);
+    B (1) = presn;
+    V(:, 1) = prec_res / presn;
+  catch
+    flag = 2;
+  end_try_catch

-  while (iter <= maxit * restart && presn > rtol * prec_b_norm)
-
+  while (flag != 2) && (iter <= max_iter_number) && ...
+        (presn > tol * prec_b_norm)
     ## restart
     if (restart_it > restart)
       restart_it = 1;
+      outer_it += 1;
       x_old = x;
-      prec_res = Pm1x (b - Ax (x_old));
+      u = feval (Afun, x_old, varargin{:});
+      prec_res = feval (M1fun, b - u, varargin{:});
+      prec_res = feval (M2fun, prec_res, varargin{:});
       presn = norm (prec_res, 2);
       B(1) = presn;
       H(:) = 0;
       V(:, 1) = prec_res / presn;
     endif
-
     ## basic iteration
-    tmp = Pm1x (Ax (V(:, restart_it)));
-    [V(:,restart_it+1), H(1:restart_it+1, restart_it)] = ...
-        mgorth (tmp, V(:,1:restart_it));
-
-    Y = (H(1:restart_it+1, 1:restart_it) \ B(1:restart_it+1));
-
-    little_res = B(1:restart_it+1) - ...
-        H(1:restart_it+1, 1:restart_it) * Y(1:restart_it);
-
+    u = feval (Afun, V(:, restart_it), varargin{:});
+    tmp = feval (M1fun, u, varargin{:});
+    tmp = feval (M2fun, tmp, varargin{:});
+    [V(:,restart_it + 1), H(1:restart_it + 1, restart_it)] = ...
+    mgorth (tmp, V(:,1:restart_it));
+    Y = (H(1:restart_it + 1, 1:restart_it) \ B(1:restart_it + 1));
+    little_res = B(1:restart_it + 1) - ...
+                 H(1:restart_it + 1, 1:restart_it) * Y(1:restart_it);
     presn = norm (little_res, 2);
-
     x = x_old + V(:, 1:restart_it) * Y(1:restart_it);
-
-    resvec(iter+1) = presn;
-    if (norm (x - x_old, inf) <= eps)
-      flag = 3;  # Stagnation: no change between iterations
+    resvec(iter + 1) = presn;
+    if (norm (x - x_pr) <= eps*norm (x))
+      flag = 3; # Stagnation: little change between iterations
       break;
     endif
-
+    if (resvec (iter + 1) <= resvec (iter_min + 1))
+      x_min = x;
+      iter_min = iter;
+      it = [outer_it, restart_it];
+    endif
+    x_pr = x;
     restart_it += 1;
     iter += 1;
   endwhile

-  if (nargout > 1)
-    ## Calculate extra outputs as requested
-    relres = presn / prec_b_norm;
-    if (relres <= rtol)
-      flag = 0;  # Converged to solution within tolerance
-    endif
+  if (flag == 2)
+    resvec = norm (b);
+    relres = 1;
+  else
+    resvec = resvec (1:iter);
+    relres = resvec (iter) / prec_b_norm;
+  endif

-    it = [floor(iter/restart), restart_it-1];
+  if ((relres <= tol) && (flag == 1))
+    flag = 0;  # Converged to solution within tolerance
   endif

+  if ((nargout < 2) && (restart != size_b)) # restart applied
+    switch (flag)
+      case {0} # gmres converged
+        printf ("gmres(%d) converged at outer iteration %d (inner iteration %d) ",restart, it (1), it (2));
+        printf ("to a solution with relative residual %d \n", relres);
+      case {1} # max number of iteration reached
+        printf ("gmres(%d) stopped at outer iteration %d (inner iteration %d) ", restart, outer_it, restart_it-1);
+        printf ("without converging to the desired tolerance %d ", tol);
+        printf ("because the maximum number of iterations was reached \n");
+        printf ("The iterated returned (number %d(%d)) ", it(1), it(2));
+        printf ("has relative residual %d \n", relres);
+      case {2} # preconditioner singular
+        printf ("gmres(%d) stopped at outer iteration %d (inner iteration %d) ",restart, outer_it, restart_it-1);
+        printf ("without converging to the desired tolerance %d ", tol);
+        printf ("because the preconditioner matrix is singular \n");
+        printf ("The iterated returned (number %d(%d)) ", it(1), it(2));
+        printf ("has relative residual %d \n", relres);
+      case {3} # stagnation
+        printf ("gmres(%d) stopped at outer iteration %d (inner iteration %d) ", restart, outer_it, restart_it - 1);
+        printf ("without converging to the desired tolerance %d", tol);
+        printf ("because it stagnates. \n");
+        printf ("The iterated returned (number %d(%d)) ", it(1), it(2));
+        printf ("has relative residual %d \n", relres);
+    endswitch
+  elseif ((nargout < 2) && (restart == size_b)) # no restart
+    switch (flag)
+      case {0} # gmres converged
+        printf ("gmres converged at iteration %d ", it(2));
+        printf ("to a solution with relative residual %d \n", relres);
+      case {1} # max number of iteration reached
+        printf ("gmres stopped at iteration %d ", restart_it - 1);
+        printf ("without converging to the desired tolerance %d ", tol);
+        printf ("because the maximum number of iterations was reached \n");
+        printf ("The iterated returned (number %d) ", it(2));
+        printf ("has relative residual %d \n", relres);
+      case {2} # preconditioner ill-conditioned
+        printf ("gmres stopped at iteration %d ", restart_it - 1);
+        printf ("without converging to the desired tolerance %d ", tol);
+        printf ("because the preconditioner matrix is singular \n")
+        printf ("The iterated returned (number %d) ", it (2));
+        printf ("has relative residual %d \n", relres);
+      case {3} # stagnation
+        printf ("gmres stopped at iteration %d ", restart_it - 1);
+        printf ("without converging at the desired tolerance %d ", tol);
+        printf ("because it stagnates\n");
+        printf ("The iterated returned (number %d) ", it(2));
+        printf ("has relative residual %d \n", relres);
+    endswitch
+  endif
 endfunction

-
 %!demo
 %! dim = 20;
 %! A = spdiags ([-ones(dim,1) 2*ones(dim,1) ones(dim,1)], [-1:1], dim, dim);
 %! b = ones (dim, 1);
-%! [x, flag, relres, iter, resvec] = gmres (A, b, 10, 1e-10, dim, @(x) x ./ diag (A), [], b)
+%! [x, flag, relres, iter, resvec] = ...
+%! gmres (A, b, 10, 1e-10, dim, @(x) x ./ diag (A), [], b)
+
+%!demo # simplest use
+%! n = 20;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+%!     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+%!     sparse (1, 2, 1, 1, n) * n / 2);
+%! b = A * ones (n, 1);
+%! restart = 5;
+%! [M1, M2] = ilu (A + 0.1 * eye (n));
+%! M = M1 * M2;
+%! x = gmres (A, b, [], [], n);
+%! x = gmres (A, b, restart, [], n); # gmres with restart
+%! Afun = @(x) A * x;
+%! x = gmres (Afun, b, [], [], n);
+%! x = gmres (A, b,[], 1e-6, n, M); # gmres without restart
+%! x = gmres (A, b, [], 1e-6, n, M1, M2);
+%! Mfun = @(x) M \ x;
+%! x = gmres (Afun, b, [], 1e-6, n, Mfun);
+%! M1fun = @(x) M1 \ x;
+%! M2fun = @(x) M2 \ x;
+%! x = gmres (Afun, b, [], 1e-6, n, M1fun, M2fun);
+%! function y = Ap (A, x, p) # compute A^p * x
+%!    y = x;
+%!    for i = 1:p
+%!      y = A * y;
+%!    endfor
+%!  endfunction
+%! Afun = @(x, p) Ap (A, x, p);
+%! x = gmres (Afun, b, [], [], n, [], [], [], 2); # solution of A^2 * x = b

-%!shared A, b, dim
+%!demo
+%! n = 10;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+%!     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+%!     sparse (1, 2, 1, 1, n) * n / 2);
+%! b = A * ones (n, 1);
+%! [M1, M2] = ilu (A + 0.1 * eye (n)); # factorization of A perturbed
+%! M = M1 * M2;
+%!
+%! ## reference solution computed by gmres after one iteration
+%! [x_ref, fl] = gmres (A, b, [], [], 1, M);
+%! x_ref
+%!
+%! ## left preconditioning
+%! [x, fl] = gmres ( M \ A, M \ b, [], [], 1);
+%! x # compare x and x_ref
+
+%!test
+%! ## Check that all type of inputs work
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M1 = diag (sqrt (diag (A)));
+%! M2 = M1;
+%! Afun = @(z) A * z;
+%! M1_fun = @(z) M1 \ z;
+%! M2_fun = @(z) M2 \ z;
+%! [x, flag] = gmres (A, b);
+%! assert (flag, 0);
+%! [x, flag] = gmres (A, b, [], [], [], M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = gmres (A, b, [], [], [], M1_fun, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = gmres (A, b, [], [], [], M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = gmres (A, b, [], [], [], M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = gmres (Afun, b);
+%! assert (flag, 0);
+%! [x, flag] = gmres (Afun, b, [],[],[], M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = gmres (Afun, b, [],[],[], M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = gmres (Afun, b, [],[],[], M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = gmres (Afun, b, [],[],[], M1_fun, M2_fun);
+%! assert (flag, 0);
+
+%!test
 %! dim = 100;
-%!test
 %! A = spdiags ([-ones(dim,1) 2*ones(dim,1) ones(dim,1)], [-1:1], dim, dim);
 %! b = ones (dim, 1);
-%! x = gmres (A, b, 10, 1e-10, dim, @(x) x ./ diag (A), [], b);
+%! [x, flag] = gmres (A, b, 10, 1e-10, dim, @(x) x ./ diag (A), [], b);
 %! assert (x, A\b, 1e-9*norm (x, Inf));
-%!
+%! [x, flag] = gmres (A, b, dim, 1e-10, 1e4, @(x) diag (diag (A)) \ x, [], b);
+%! assert(x, A\b, 1e-7*norm (x, Inf));
+
 %!test
-%! x = gmres (A, b, dim, 1e-10, 1e4, @(x) diag (diag (A)) \ x, [], b);
-%! assert(x, A\b, 1e-7*norm (x, Inf));
-%!
-%!test
-%! A = spdiags ([[1./(2:2:2*(dim-1)) 0]; 1./(1:2:2*dim-1); [0 1./(2:2:2*(dim-1))]]', -1:1, dim, dim);
+%! dim = 100;
+%! A = spdiags ([[1./(2:2:2*(dim-1)) 0]; 1./(1:2:2*dim-1); ...
+%! [0 1./(2:2:2*(dim-1))]]', -1:1, dim, dim);
 %! A = A'*A;
 %! b = rand (dim, 1);
-%! [x, resvec] = gmres (@(x) A*x, b, dim, 1e-10, dim, @(x) x./diag (A), [], []);
+%! [x, resvec] = gmres (@(x) A*x, b, dim, 1e-10, dim,...
+%!                      @(x) x./diag (A), [], []);
 %! assert (x, A\b, 1e-9*norm (x, Inf));
-%! x = gmres (@(x) A*x, b, dim, 1e-10, 1e6, @(x) diag (diag (A)) \ x, [], []);
+%! [x, flag] = gmres (@(x) A*x, b, dim, 1e-10, 1e6,...
+%!                    @(x) diag (diag (A)) \ x, [], []);
 %! assert (x, A\b, 1e-9*norm (x, Inf));
-%!test
-%! x = gmres (@(x) A*x, b, dim, 1e-10, 1e6, @(x) x ./ diag (A), [], []);
+%! [x, flag] = gmres (@(x) A*x, b, dim, 1e-10, 1e6,...
+%!                    @(x) x ./ diag (A), [], []);
 %! assert (x, A\b, 1e-7*norm (x, Inf));

+%!test
+%! ## gmres solves complex linear systems
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0])) + ...
+%! 1i * toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! [x, flag] = gmres(A, b, [], [], 5);
+%! assert (flag, 0);
+%! assert (x, ones (5, 1), -1e-06)

-%!error gmres (1)
-%!error gmres (1,2,3,4,5,6,7,8,9)
-%!error <A must be> gmres ({1},2)
-%!error <A must be a function or square matrix> gmres ({1},2)
-%!error <M1 must be a function or matrix> gmres (1,2,3,4,5,{6})
-%!error <M2 must be a function or matrix> gmres (1,2,3,4,5,6,{7})
+%!test
+%! ## Maximum number of iteration reached
+%! A = hilb (100);
+%! b = sum (A, 2);
+%! [x, flag, relres, iter] = gmres (A, b, [], 1e-14);
+%! assert(flag, 1);
+
+%!test
+%! ## gmres recognizes that the preconditioner matrix is singular
+%! AA = 2 * eye (3);
+%! bb = ones (3, 1);
+%! I = eye (3);
+%! M = [1 0 0; 0 1 0; 0 0 0]; # the last row is zero
+%! [x, flag] = gmres(@(y) AA * y, bb, [], [], [], @(y) M \ y, @(y) y);
+%! assert (flag, 2)
+
+%!test
+%! A = rand (4);
+%! A = A' * A;
+%! [x, flag] = gmres (A, zeros (4, 1), [], [], [], [], [], ones (4, 1));
+%! assert (x, zeros (4, 1))
+
+%!test
+%! A = rand (4);
+%! b = zeros (4, 1);
+%! [x, flag, relres, iter] = gmres (A, b);
+%! assert (relres, 0)
+
+%!test
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = A * ones (5, 1);
+%! [x, flag, relres, iter] = gmres (A, b, [], [], [], [], [], ...
+%! ones (5, 1) + 1e-8);
+%! assert (iter, [0, 0])
+
+%!test
+%! A = rand (20);
+%! b = A * ones (20, 1);
+%! [x, flag, relres, iter, resvec] = gmres (A, b, [], [], 1);
+%! assert (iter, [1, 1])
+
+%!test
+%! A = hilb (20);
+%! b = A * ones (20, 1);
+%! [x, flag, relres, iter, resvec] = gmres (A, b ,5, 1e-14);
+%! assert (iter, [4, 5])
+
+%!test
+%! A = single (1);
+%! b = 1;
+%! [x, flag] = gmres (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = 1;
+%! b = single (1);
+%! [x, flag] = gmres (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = single (1);
+%! b = single (1);
+%! [x, flag] = gmres (A, b);
+%! assert (class (x), "single")
+
+%!test
+%!function y = Afun (x)
+%!   A = toeplitz ([2, 1, 0, 0], [2, -1, 0, 0]);
+%!   y = A * x;
+%!endfunction
+%! [x, flag] = gmres ("Afun", [1; 2; 2; 3]);
+%! assert (x, ones(4, 1), 1e-6)
+
+%!test # preconditioned residual
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M = magic (5);
+%! [x, flag, relres] = gmres (A, b, [], [], 2, M);
+%! assert (relres, norm (M \ (b - A * x)) / norm (M \ b), 8 * eps)
--- a/scripts/sparse/ilu.m	Fri Mar 09 13:44:08 2018 +0200
+++ b/scripts/sparse/ilu.m	Sun Mar 11 07:29:04 2018 +0900
@@ -167,11 +167,11 @@
   endif

   if (! (issparse (A) && issquare (A)))
-    error ("ichol: A must be a sparse square matrix");
+    error ("ilu: A must be a sparse square matrix");
   endif

   if (! isstruct (opts))
-    error ("ichol: OPTS must be a structure.");
+    error ("ilu: OPTS must be a structure.");
   endif

   ## If A is empty then return empty L, U and P for Matlab compatibility
--- a/scripts/sparse/module.mk	Fri Mar 09 13:44:08 2018 +0200
+++ b/scripts/sparse/module.mk	Sun Mar 11 07:29:04 2018 +0900
@@ -3,7 +3,9 @@
   %reldir%/private

 %canon_reldir%_PRIVATE_FCN_FILES = \
-  %reldir%/private/__sprand__.m
+  %reldir%/private/__sprand__.m \
+  %reldir%/private/__alltohandles__.m \
+  %reldir%/private/__default__input__.m

 %canon_reldir%_FCN_FILES = \
   %reldir%/bicg.m \
@@ -32,6 +34,7 @@
   %reldir%/spstats.m \
   %reldir%/spy.m \
   %reldir%/svds.m \
+  %reldir%/tfqmr.m \
   %reldir%/treelayout.m \
   %reldir%/treeplot.m
--- a/scripts/sparse/pcg.m	Fri Mar 09 13:44:08 2018 +0200
+++ b/scripts/sparse/pcg.m	Sun Mar 11 07:29:04 2018 +0900
@@ -1,4 +1,5 @@
 ## Copyright (C) 2004-2017 Piotr Krzyzanowski
+## Copyright (C) 2016-2017 Cristiano Dorigo
 ##
 ## This file is part of Octave.
 ##
@@ -17,55 +18,62 @@
 ## <https://www.gnu.org/licenses/>.

 ## -*- texinfo -*-
-## @deftypefn  {} {@var{x} =} pcg (@var{A}, @var{b})
-## @deftypefnx {} {@var{x} =} pcg (@var{A}, @var{b}, @var{tol})
-## @deftypefnx {} {@var{x} =} pcg (@var{A}, @var{b}, @var{tol}, @var{maxit})
-## @deftypefnx {} {@var{x} =} pcg (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M})
-## @deftypefnx {} {@var{x} =} pcg (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M1}, @var{M2})
-## @deftypefnx {} {@var{x} =} pcg (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0})
-## @deftypefnx {} {@var{x} =} pcg (@var{Afun}, @var{b}, @var{tol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0}, @dots{})
-## @deftypefnx {} {[@var{x}, @var{flag}, @var{relres}, @var{iter}, @var{resvec}, @var{eigest}] =} pcg (@dots{})
+## @deftypefn {} {@var{x} =} pcg (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{m1}, @var{m2}, @var{x0}, @dots{})
+## @deftypefnx {} {@var{x} =} pcg (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M}, [], @var{x0}, @dots{})
+## @deftypefnx {} {[@var{x}, @var{flag}, @var{relres}, @var{iter}, @var{resvec}, @var{eigest}] =} pcg (@var{A}, @var{b}, @dots{})
 ##
-## Solve the linear system of equations @w{@code{@var{A}*@var{x} = @var{b}}}
+## Solve the linear system of equations @w{@code{@var{A} * @var{x} = @var{b}}}
 ## by means of the Preconditioned Conjugate Gradient iterative method.
 ##
-## The input arguments are
+## The input arguments are:
 ##
 ## @itemize
-## @item
-## @var{A} can either be a square (preferably sparse) matrix or a function
-## handle, inline function, or string containing the name of a function that
-## computes @w{@code{@var{A}*@var{x}}}.  In principle, @var{A} should be
-## symmetric and positive definite; if @code{pcg} finds that @var{A} is not
-## positive definite then a warning is printed and the @var{flag} output will
-## be set to 3.
+## @item @var{A} is the matrix of the linear system and it must be square.
+## @var{A} can be passed as a matrix, function handle, or inline
+## function @code{Afun} such that @code{Afun(x) = A * x}.  Additional
+## parameters to @code{Afun} are passed after @var{x0}.
+##
+## @var{A} has to be Hermitian and Positive Definite (HPD).  If
+## @code{pcg} detects @var{A} not to be positive definite, a warning
+## is printed and the @var{flag} output is set.
 ##
 ## @item
 ## @var{b} is the right-hand side vector.
 ##
 ## @item
 ## @var{tol} is the required relative tolerance for the residual error,
-## @w{@code{@var{b} - @var{A}*@var{x}}}.  The iteration stops if
-## @w{@code{norm (@var{b} - @var{A}*@var{x})}} @leq{}
-## @w{@code{@var{tol} * norm (@var{b})}}.
-## If @var{tol} is omitted or empty @code{[]} then a tolerance of 1e-6 is used.
+## @w{@code{@var{b} - @var{A} * @var{x}}}.  The iteration stops if
+## @w{@code{norm (@var{b} - @var{A} * @var{x})} @leq{}
+## @w{@code{@var{tol} * norm (@var{b})}}}.
+## If @var{tol} is omitted or empty, then a tolerance of 1e-6 is used.
 ##
 ## @item
-## @var{maxit} is the maximum allowable number of iterations.  If @var{maxit}
-## is omitted or empty then a value of @code{min (rows (@var{A}), 20)} is used.
+## @var{maxit} is the maximum allowed number of iterations; if @var{maxit}
+## is omitted or empty then a value of 20 is used.
 ##
 ## @item
-## @var{M} = @var{M1} * @var{M2} is the (left) preconditioning matrix, such
-## that the iteration is (theoretically) equivalent to solving
-## @w{@code{@var{P} * @var{x} = @var{M} \ @var{b}}}, with
-## @w{@code{@var{P} = @var{M} \ @var{A}}}.
-## Note that a proper choice of the preconditioner can dramatically improve the
-## overall performance of the method.  Instead of matrices @var{M1} and
-## @var{M2}, the user may pass two functions which return the results of
-## applying the inverse of @var{M1} and @var{M2} to a vector (usually this is
-## the preferred way of using the preconditioner).  If @var{M1} is omitted or
-## empty then no preconditioning is applied.  If @var{M2} is omitted,
-## @var{M} = @var{M1} will be used as a preconditioner.
+## @var{m} is a HPD preconditioning matrix. For any decomposition
+## @code{@var{m} = @var{p1} * @var {p2}} such that
+## @w{@code{inv (@var{p1}) * @var{A} * inv (@var{p2})}} is HPD, the
+## conjugate gradient method is formally applied to the linear  system
+## @w{@code{inv (@var{p1}) * @var{A} * inv (@var{p2}) * @var{y} = inv
+## (@var{p1}) * @var{b}}},
+## with @code{@var{x} = inv (@var{p2}) * @var{y}} (split preconditioning).
+## In practice, at each iteration of the conjugate gradient method a
+## linear system with matrix @var{m} is solved with @code{mldivide}.
+## If a particular factorization
+## @code{@var{m} = @var{m1} * @var{m2}} is available (for instance, an
+## incomplete Cholesky factorization of @var{a}), the two matrices
+## @var{m1} and @var{m2} can be passed and the relative linear systems
+## are solved with the @code{mldivide} operator.
+## Note that a proper choice of the preconditioner may dramatically improve
+## the overall performance of the method.  Instead of matrices @var{m1} and
+## @var{m2}, the user may pass two functions which return the results of
+## applying the inverse of @var{m1} and @var{m2} to a vector.
+## If @var{m1} is omitted or empty @code{[]}, then no preconditioning
+## is applied. If no factorization of @var{m} is available, @var{m2}
+## can be omitted or left [], and the input variable @var{m1} can be
+## used to pass the preconditioner @var{m}.
 ##
 ## @item
 ## @var{x0} is the initial guess.  If @var{x0} is omitted or empty then the
@@ -73,28 +81,31 @@
 ## @end itemize
 ##
 ## The arguments which follow @var{x0} are treated as parameters, and passed in
-## a proper way to any of the functions (@var{Afun} or @var{Mfun}) which are
-## given to @code{pcg}.  See the examples below for further details.
+## a proper way to any of the functions (@var{A} or @var{m1} or
+## @var{m2}) which are passed to @code{pcg}.
+## See the examples below for further details.
 ##
-## The output arguments are
+## The output arguments are:
 ##
 ## @itemize
 ## @item
 ## @var{x} is the computed approximation to the solution of
-## @w{@code{@var{A}*@var{x} = @var{b}}}.
+## @w{@code{@var{A} * @var{x} = @var{b}}}. If the algorithm did not converge,
+## then @var{x} is the iterated which has the minimum residual.
 ##
 ## @item
-## @var{flag} reports on convergence.
-##
+## @var{flag} reports on the convergence:
 ## @itemize
-## @item 0 : The algorithm converged and the tolerance criterion given by
-## @var{tol} is satisfied.
-##
-## @item 1 : The maximum number of iterations was reached (specified by
-## @var{maxit}).
-##
-## @item 3 : The (preconditioned) matrix was found @strong{not} to be positive
-## definite.
+## @item 0: The algorithm converged at the prescribed tolerance.
+## @item 1: The algorithm did not converge and it reached the maximum
+## number of iterations.
+## @item 2: The preconditioner matrix is singular.
+## @item 3: The algorithm stagnated, i.e. the absolute value of the
+## difference between
+## the actual iteration @var{x} and the previous is less than
+## @code{@var{eps} * norm (@var{x},2)}.
+## @item 4: The algorithm detects that the input (preconditioned) matrix is not
+## HPD.
 ## @end itemize
 ##
 ## @item
@@ -102,43 +113,50 @@
 ## measured in the Euclidean norm.
 ##
 ## @item
-## @var{iter} is the actual number of iterations performed.
+## @var{iter} indicates the iteration of @var{x} which it was
+## computed. Since the output @var{x} corresponds to the minimal
+## residual solution, the total number of iterations that
+## the method performed is given by @code{length(resvec) - 1}.
 ##
 ## @item
 ## @var{resvec} describes the convergence history of the method.
-## The first column, @code{@var{resvec}(i,1)}, is the Euclidean norm of the
-## residual, and the second column, @code{@var{resvec}(i,2)} is the
-## preconditioned residual norm, after the (@var{i}-1)-th iteration,
-## @code{@var{i} = 1, 2, @dots{}, @var{iter}+1}.  The preconditioned residual
-## norm is defined as
-## @w{@code{norm (@var{r}) ^ 2 = @var{r}' * (@var{M} \ @var{r})}} where
-## @w{@code{@var{r} = @var{b} - @var{A}*@var{x}}}, see also the
-## description of @var{M}.  If @var{eigest} is not required, only the first
-## column of @var{resvec} (Euclidean norm of residual) is returned.
+## @code{@var{resvec} (@var{i}, 1)} is the Euclidean norm of the residual, and
+## @code{@var{resvec} (@var{i}, 2)} is the preconditioned residual
+## norm, after the
+## (@var{i}-1)-th iteration, @code{@var{i} = 1, 2, @dots{}, @var{iter}+1}.
+## The preconditioned residual norm is defined as
+## @code{@var{r}' * (@var{m} \ @var{r})} where
+## @code{@var{r} = @var{b} - @var{A} * @var{x}}, see also the
+## description of @var{m}.  If @var{eigest} is not required, only
+## @code{@var{resvec} (:, 1)} is returned.
 ##
 ## @item
 ## @var{eigest} returns the estimate for the smallest @code{@var{eigest}(1)}
 ## and largest @code{@var{eigest}(2)} eigenvalues of the preconditioned matrix
-## @w{@code{@var{P} = @var{M} \ @var{A}}}.  In particular, if no
+## @w{@code{@var{P} = @var{m} \ @var{A}}}.  In particular, if no
 ## preconditioning is used, the estimates for the extreme eigenvalues of
 ## @var{A} are returned.  @code{@var{eigest}(1)} is an overestimate and
 ## @code{@var{eigest}(2)} is an underestimate, so that
 ## @code{@var{eigest}(2) / @var{eigest}(1)} is a lower bound for
-## the condition number of @var{P} (@code{cond (@var{P}, 2)}).  The method
-## which computes @var{eigest} works only for symmetric positive definite
-## matrices @var{A} and @var{M}, and the user is responsible for verifying this
-## assumption.
+## @code{cond (@var{P}, 2)}, which nevertheless in the limit should
+## theoretically be equal to the actual value of the condition number.
 ## @end itemize
 ##
-## Consider a trivial problem with a diagonal matrix (which naturally has high
-## sparsity):
+##
+## Let us consider a trivial problem with a tridiagonal matrix
 ##
 ## @example
 ## @group
 ## n = 10;
-## A = diag (sparse (1:n));
-## b = rand (n, 1);
-## [L, U, P] = ilu (A, struct ("droptol", 1e-3));
+## A = toeplitz (sparse ([1, 1], [1, 2], [2, 1], 1, n));
+## b = A * ones (n, 1);
+## M1 = ichol (A); # in this tridiagonal case it corresponds to chol (A)'
+## M2 = M1';
+## M = M1 * M2;
+## Afun = @@(x) A * x;
+## Mfun = @@(x) M \ x;
+## M1fun = @@(x) M1 \ x;
+## M2fun = @@(x) M2 \ x;
 ## @end group
 ## @end example
 ##
@@ -149,62 +167,68 @@
 ## @end example
 ##
 ## @sc{Example 2:} @code{pcg} with a function which computes
-## @code{@var{A}*@var{x}}
+## @code{@var{A} * @var{x}}
+##
+## @example
+## x = pcg (Afun, b)
+## @end example
+##
+## @sc{Example 3:} @code{pcg} with a preconditioner matrix @var{M}
+##
+## @example
+## x = pcg (A, b, 1e-06, 100, M)
+## @end example
+##
+## @sc{Example 4:} @code{pcg} with a function as preconditioner
+##
+## @example
+## x = pcg (Afun, b, 1e-6, 100, Mfun)
+## @end example
+##
+## @sc{Example 5:} @code{pcg} with preconditioner matrices @var{M1}
+## and @var{M2}
+##
+## @example
+## x = pcg (A, b, 1e-6, 100, M1, M2)
+## @end example
+##
+## @sc{Example 6:} @code{pcg} with functions as preconditioners
+##
+## @example
+## x = pcg (Afun, b, 1e-6, 100, M1fun, M2fun)
+## @end example
+##
+## @sc{Example 7:} @code{pcg} with as input a function requiring an argument
 ##
 ## @example
 ## @group
-## function y = apply_A (x)
-##   y = [1:N]' .* x;
-## endfunction
-##
-## x = pcg ("apply_A", b)
+##   function y = Ap (A, x, p) # compute A^p * x
+##      y = x;
+##      for i = 1:p
+##        y = A * y;
+##      endfor
+##   endfunction
+## Apfun = @@(x, p) Ap (A, x, p);
+## x = pcg (Apfun, b, [], [], [], [], [], 2);
 ## @end group
 ## @end example
 ##
-## @sc{Example 3:} @code{pcg} with a preconditioner: @var{L} * @var{U}
-##
-## @example
-## x = pcg (A, b, 1e-6, 500, L*U)
-## @end example
-##
-## @sc{Example 4:} @code{pcg} with a preconditioner: @var{L}, @var{U}.
-## Faster than @sc{Example 3} because lower and upper triangular matrices are
-## easier to invert
-##
-## @example
-## x = pcg (A, b, 1e-6, 500, L, U)
-## @end example
-##
-## @sc{Example 5:} Preconditioned iteration, with full diagnostics.  The
-## preconditioner (quite strange, because even the original matrix @var{A} is
-## trivial) is defined as a function
+## @sc{Example 8:} explicit example to show that @code{pcg} uses a
+## split preconditioner
 ##
 ## @example
 ## @group
-## function y = apply_M (x)
-##   k = floor (length (x) - 2);
-##   y = x;
-##   y(1:k) = x(1:k) ./ [1:k]';
-## endfunction
-##
-## [x, flag, relres, iter, resvec, eigest] = ...
-##                    pcg (A, b, [], [], "apply_M");
-## semilogy (1:iter+1, resvec);
-## @end group
-## @end example
+## M1 = ichol (A + 0.1 * eye (n)); # factorization of A perturbed
+## M2 = M1';
+## M = M1 * M2;
 ##
-## @sc{Example 6:} A preconditioner which depends on a parameter @var{k}.
+## ## reference solution computed by pcg after two iterations
+## [x_ref, fl] = pcg (A, b, [], 2, M)
 ##
-## @example
-## @group
-## function y = apply_M (x, varargin)
-##   K = varargin@{1@};
-##   y = x;
-##   y(1:K) = x(1:K) ./ [1:K]';
-## endfunction
+## ## split preconditioning
+## [y, fl] = pcg ((M1 \ A) / M2, M1 \ b, [], 2)
+## x = M2 \ y # compare x and x_ref
 ##
-## [x, flag, relres, iter, resvec, eigest] = ...
-##      pcg (A, b, [], [], "apply_m", [], [], 3)
 ## @end group
 ## @end example
 ##
@@ -222,321 +246,276 @@
 ## @url{http://www-users.cs.umn.edu/~saad/books.html}
 ## @end enumerate
 ##
-## @seealso{sparse, pcr}
+## @seealso{sparse, pcr, gmres, bicg, bicgstab, cgs}
 ## @end deftypefn

 ## Author: Piotr Krzyzanowski <piotr.krzyzanowski@mimuw.edu.pl>
 ## Modified by: Vittoria Rezzonico <vittoria.rezzonico@epfl.ch>
 ##  - Add the ability to provide the pre-conditioner as two separate matrices

-function [x, flag, relres, iter, resvec, eigest] = pcg (A, b, tol, maxit, M1, M2, x0, varargin)

-  ## FIXME: Would be good to have actual validation of some inputs, rather
-  ##        than just passing them through to internal variables.
-
-  if (nargin < 7 || isempty (x0))
-    x = zeros (size (b));
-  else
-    x = x0;
-  endif
+function [x_min, flag, relres, iter_min, resvec, eigest] =...
+         pcg (A, b, tol = [], maxit = [], M1 = [], M2 = [], x0 = [], varargin)

-  if (nargin < 5 || isempty (M1))
-     exist_m1 = false;
-  else
-     exist_m1 = true;
-  endif
+  ## Insert the default input (if necessary)
+  [tol, maxit, x0] = __default__input__ ({1e-6, min(rows (b), 20),...
+                                          zeros(size (b))}, tol, maxit, x0);

-  if (nargin < 6 || isempty (M2))
-     exist_m2 = false;
-  else
-     exist_m2 = true;
+  if (tol >= 1)
+    warning ("Input tol is bigger than 1. \n Try to use a smaller tolerance.");
+  elseif (tol <= eps / 2)
+    warning ("Input tol may not be achievable by pcg. \n Try to use a bigger tolerance");
   endif

-  if (nargin < 4 || isempty (maxit))
-    maxit = min (rows (b), 20);
-  endif
+  ## Check if the input data A,b,m1,m2 are consistent (i.e. if they are
+  ## matrix or function handle)
+
+  [Afun, M1fun, M2fun] = __alltohandles__ (A, b, M1, M2, "pcg");
+
+  maxit += 2;
+  n_arg_out = nargout;

-  ## Allow extra iterations for function eval done outside main loop.
-  maxit += 2;
-
-  if (nargin < 3 || isempty (tol))
-    tol = 1e-6;
+  ## Set Initial data
+  b_norm = norm (b);
+  if (b_norm == 0)
+     if (n_arg_out < 2)
+       printf("The right hand side vector is all zero so pcg \n");
+       printf ("returned an all zero solution without iterating.\n");
+     endif
+     x_min = b;
+     flag = 0;
+     relres = 0;
+     resvec = 0;
+     iter_min = 0;
+     eigest = [NaN, NaN];
+     return
   endif

-  preconditioned_residual_out = false;
-  if (nargout > 5)
-    preconditioned_residual_out = true;
-    T = zeros (maxit, maxit);
-  endif
+  x = x_pr = x_min = x0;

-  ## Assume A is positive definite, until proved otherwise.
-  matrix_positive_definite = true;
+  ## x_pr (x previous) needs to check the stagnation
+  ## x_min needs to save the iterated with minimum residual

+  r = b - feval (Afun, x, varargin{:});
+  iter = 2;
+  iter_min = 0;
+  flag = 1;
+  resvec = zeros (maxit + 1, 2);
+  resvec(1, 1) = norm (r);
   p = zeros (size (b));
-  oldtau = 1;
-  if (isnumeric (A))
-    ## A is a matrix.
-    r = b - A*x;
+  alpha = old_tau = 1;
+
+  if (n_arg_out > 5)
+    T = zeros (maxit, maxit);
   else
-    ## A should be a function.
-    r = b - feval (A, x, varargin{:});
+    T = [];
   endif

-  b_norm = norm (b);
-  resvec(1,1) = norm (r);
-  alpha = 1;
-  iter = 2;
-
-  while (resvec(iter-1,1) > (tol * b_norm) && iter < maxit)
-    if (exist_m1)
-      if (isnumeric (M1))
-        y = M1 \ r;
-      else
-        y = feval (M1, r, varargin{:});
-      endif
+  while (resvec(iter-1,1) > tol * b_norm && iter < maxit)
+    if (iter == 2) # Check whether M1 or M2 are singular
+      try
+        warning ("error","Octave:singular-matrix","local")
+        z = feval (M1fun, r, varargin{:});
+        z = feval (M2fun, z, varargin{:});
+      catch
+        flag = 2;
+        break;
+      end_try_catch
     else
-      y = r;
+      z = feval (M1fun, r, varargin{:});
+      z = feval (M2fun, z, varargin{:});
     endif
-    if (exist_m2)
-      if (isnumeric (M2))
-        z = M2 \ y;
-      else
-        z = feval (M2, y, varargin{:});
-      endif
-    else
-      z = y;
-    endif
+
     tau = z' * r;
-    resvec(iter-1,2) = sqrt (tau);
-    beta = tau / oldtau;
-    oldtau = tau;
+    resvec(iter - 1, 2) = sqrt (tau);
+    beta = tau / old_tau;
+    old_tau = tau;
     p = z + beta * p;
-    if (isnumeric (A))
-      ## A is a matrix.
-      w = A * p;
-    else
-      ## A should be a function.
-      w = feval (A, p, varargin{:});
+    w = feval (Afun, p, varargin{:});
+
+    ## Needed only for eigest.
+
+    old_alpha = alpha;
+    den = p' * w;
+    alpha = tau / den;
+
+    ## Check if alpha is negative and/or if it has a consistent
+    ## imaginary part: if yes then A probably is not positive definite
+    if ((abs (imag (tau)) >= abs (real (tau)) * tol) || ...
+        real (tau) <= 0 || ...
+        (abs (imag (den)) >= abs (real (den)) * tol) || ...
+        (real (den) <= 0))
+      flag = 4;
+      break;
     endif
-    oldalpha = alpha;    # Needed only for eigest.
-    alpha = tau / (p'*w);
-    if (alpha <= 0.0)
-      ## Negative matrix.
-      matrix_positive_definite = false;
-    endif
+
     x += alpha * p;
     r -= alpha * w;
-    if (preconditioned_residual_out && iter > 2)
-      T(iter-1:iter, iter-1:iter) += [1 sqrt(beta); sqrt(beta) beta]./oldalpha;
+    resvec(iter, 1) = norm (r);
+    ## Chek if the iterated has minimum residual
+    if (resvec (iter,1) <= resvec (iter_min + 1,1))
+      x_min = x;
+      iter_min = iter - 1;
     endif
-    resvec(iter,1) = norm (r);
+    if (n_arg_out > 5 && iter > 2)
+      T(iter-1:iter, iter-1:iter) = T(iter-1:iter, iter-1:iter) + ...
+                                    [1, sqrt(beta); sqrt(beta), beta] ./ ...
+                                    old_alpha;
+    endif
     iter += 1;
+    if (norm (x - x_pr) <= eps * norm (x)) # Check the stagnation
+      flag = 3;
+      break;
+    endif
+    x_pr = x;
   endwhile

-  if (preconditioned_residual_out)
-    if (matrix_positive_definite)
+  if (n_arg_out > 5)
+  ## Apply the preconditioner once more and finish with the precond
+  ## residual.
+    z = feval (M1fun, r, varargin{:});
+    z = feval (M2fun, z, varargin{:});
+  endif
+
+  ## (Eventually) computes the eigenvalue of inv(m2)*inv(m1)*A
+  if (n_arg_out > 5)
+    if (flag != 4)
       if (iter > 3)
         T = T(2:iter-2,2:iter-2);
         l = eig (T);
         eigest = [min(l), max(l)];
       else
+        eigest = [NaN, NaN];
         warning ("pcg: eigenvalue estimate failed: iteration converged too fast");
-        eigest = [NaN, NaN];
       endif
     else
       eigest = [NaN, NaN];
-    endif
-
-    ## Apply the preconditioner once more and finish with the precond residual.
-    if (exist_m1)
-      if (isnumeric (M1))
-        y = M1 \ r;
-      else
-        y = feval (M1, r, varargin{:});
-      endif
-    else
-      y = r;
+      warning ('pcg: eigenvalue estimate failed: matrix not positive definite?')
     endif
-    if (exist_m2)
-      if (isnumeric (M2))
-        z = M2 \ y;
-      else
-        z = feval (M2, y, varargin{:});
-      endif
-    else
-      z = y;
-    endif
+    resvec(iter - 1, 2) = sqrt (r' * z);
+    resvec  = resvec (1:(iter-1), :);
+  else
+    eigest = [NaN, NaN];
+    resvec = resvec(1:(iter-1),1);
+  endif
+
+  ## Set the last variables

-    resvec(iter-1,2) = sqrt (r' * z);
+  if (flag == 2)
+    relres = 1;
+  elseif (resvec (1, 1) == 0)
+    relres = 0;
   else
-    resvec = resvec(:,1);
+    relres = resvec(iter_min+1, 1) ./ resvec(1, 1);
+  endif
+
+  iter -= 2; # compatibility
+
+  ## Set the flag in the proper way if flag not 3, 4 or 2
+  if (flag == 2)
+    flag = 2;
+  elseif (flag == 1) && (relres <= tol)
+    flag = 0;
   endif

-  flag = 0;
-  relres = resvec(iter-1,1) ./ resvec(1,1);
-  iter -= 2;
-  if (iter >= maxit - 2)
-    flag = 1;
-    if (nargout < 2)
-      warning ("pcg: maximum number of iterations (%d) reached\n", iter);
-      warning ("pcg: the initial residual norm was reduced %g times.\n",
-               1.0 / relres);
-    endif
-  elseif (nargout < 2)
-    fprintf (stderr, "pcg: converged in %d iterations.\n", iter);
-    fprintf (stderr, "pcg: the initial residual norm was reduced %g times.\n",
-             1.0 / relres);
+  if (n_arg_out < 2)
+    switch (flag)
+      case {0}
+        printf ("pcg converged at iteration %d ", iter_min);
+        printf ("with relative residual %d\n", relres);
+      case {1}
+        printf ("pcg stopped at iteration %d ", iter+1);
+        printf ("without converging to the desired tolerance %d ", tol);
+        printf ("because the maximum number of iteration was reached, \n");
+        printf ("The iterated returned (number %d) ",iter_min);
+        printf ("has relative residual %d \n", relres);
+      case {2}
+        printf ("pcg stopped at iteration %d ", iter+1)
+        printf ("without converging to the desired tolerance %d ", tol);
+        printf ("because the preconditioned matrix is singular.\n");
+        printf ("The iterated returned (number %d) ", iter_min);
+        printf ("has relative residual %d \n", relres);
+      case {3}
+        printf ("pcg stopped at iteration %d ", iter+1);
+        printf ("without converging to the desired tolerance %d ", tol);
+        printf ("because of stagnation. \n");
+        printf ("The iterated returned (number %d) ", iter_min);
+        printf ("has relative residual %d.\n", relres);
+      case {4}
+        printf ("pcg stopped at iteration %d ", iter + 1);
+        printf ("without converging to the desired tolerance %d ",tol);
+        printf ("because the (preconditioned) matrix is not positive definite. \n");
+        printf ("The iterate returned (number %d) ", iter_min);
+        printf ("has relative residual %d \n", relres);
+    endswitch
   endif
-
-  if (! matrix_positive_definite)
-    flag = 3;
-    if (nargout < 2)
-      warning ("pcg: matrix A was not positive definite\n");
-    endif
-  endif
-
 endfunction

-
-%!demo
-%! ## Simplest usage of pcg (see also "help pcg")
-%!
-%! N = 10;
-%! A = diag ([1:N]);
-%! b = rand (N, 1);
-%! y = A \ b;  # y is the true solution
-%! x = pcg (A, b);
-%! printf ("The solution relative error is %g\n", norm (x - y) / norm (y));
-%!
-%! ## Don't be alarmed if pcg issues some warning messages in this example.
-
-%!demo
-%! ## Full output from pcg, except for the eigenvalue estimates.
-%! ## Use the output to plot the convergence history.
-%!
-%! N = 10;
-%! A = diag ([1:N]);
-%! b = rand (N, 1);
-%! X = A \ b;  # X is the true solution
-%! [x, flag, relres, iter, resvec] = pcg (A, b);
-%! printf ("The solution relative error is %g\n", norm (x - X) / norm (X));
-%! semilogy ([0:iter], resvec ./ resvec(1), "o-g");
-%! title ("Convergence history");
-%! set (gca, "xtick", [0:iter]);
-%! xlabel ("Iteration");  ylabel ("log( ||b-Ax|| / ||b|| )");
-%! legend ("relative residual");
-
-%!demo
-%! ## Full output from pcg, including the eigenvalue estimates.
-%! ## Hilbert matrix is extremely ill-conditioned, so pcg WILL have problems.
-%!
-%! N = 10;
-%! A = hilb (N);
-%! b = rand (N, 1);
-%! X = A \ b;  # X is the true solution
-%! M = diag (diag (A));  # Jacobi preconditioner
-%! [x, flag, relres, iter, resvec, eigest] = pcg (A, b, [], 200, M);
-%! printf ("The solution relative error is %g\n", norm (x - X) / norm (X));
-%! printf ("Condition number estimate is %g\n", eigest(2) / eigest(1));
-%! printf ("Actual condition number is   %g\n", cond (A));
-%! semilogy ([0:iter], resvec, ["o-g";"+-r"]);
-%! title ("Convergence history");
-%! xlabel ("Iteration");  ylabel ("log( ||b-Ax|| )");
-%! legend ("absolute residual", "absolute preconditioned residual");
-
-%!demo
-%! ## Full output from pcg, including the eigenvalue estimates.
-%! ## We use the 1-D Laplacian matrix for A, and cond (A) = O(N^2)
-%! ## which is the reason we need some preconditioner; here we take
-%! ## a very simple and not powerful Jacobi preconditioner,
-%! ## which is the diagonal of A.
-%!
-%! N = 100;
-%! ## Form 1-D Laplacian matrix (2 on main diagonal, -1 on supra/sub-diagonals)
-%! A = diag (repmat (2, [100, 1])) ...
-%!     + diag (repmat (-1, [99, 1]), -1) ...
-%!     + diag (repmat (-1, [99, 1]), +1);
-%! b = rand (N, 1);
-%! X = A \ b;  # X is the true solution
-%! maxit = 80;
-%! printf ("System condition number is %g\n", cond (A));
-%! ## No preconditioner: the convergence is very slow!
-%!
-%! [x, flag, relres, iter, resvec, eigest] = pcg (A, b, [], maxit);
-%! printf ("System condition number estimate is %g\n", eigest(2) / eigest(1));
-%! semilogy ([0:iter], resvec(:,1), "o-g");
-%! ylim ([1e-6, 1e3])
-%! title ("Convergence history");
-%! xlabel ("Iteration");  ylabel ("log( ||b-Ax|| )");
-%! legend ("NO preconditioning: absolute residual", ...
-%!         "location", "north");
-%! pause (1.5);
-%!
-%! ## Test Jacobi preconditioner: it does not help much.
-%!
-%! M = diag (diag (A));  # Jacobi preconditioner
-%! [x, flag, relres, iter, resvec, eigest] = pcg (A, b, [], maxit, M);
-%! printf ("JACOBI preconditioned system condition number estimate is %g\n", eigest(2) / eigest(1));
-%! hold on;
-%! semilogy ([0:iter], resvec(:,1), "x-r");
-%! legend ("NO preconditioning: absolute residual", ...
-%!         "JACOBI preconditioner: absolute residual",
-%!         "location", "north");
-%! pause (1.5);
-%!
-%! ## Test non-overlapping block Jacobi preconditioner: it will help much!
-%!
-%! M = zeros (N, N);
-%! k = 4;
-%! for i = 1 : k : N
-%!   M(i:i+k-1, i:i+k-1) = A(i:i+k-1, i:i+k-1);
-%! endfor
-%! [x, flag, relres, iter, resvec, eigest] = pcg (A, b, [], maxit, M);
-%! printf ("BLOCK JACOBI preconditioned system condition number estimate is %g\n", eigest(2) / eigest(1));
-%! semilogy ([0:iter], resvec(:,1), "s-b");
-%! legend ("NO preconditioning: absolute residual", ...
-%!         "JACOBI preconditioner: absolute residual", ...
-%!         "BLOCK JACOBI preconditioner: absolute residual",
-%!         "location", "north");
-%! hold off;
+%!test
+%! ## Check that all type of inputs work
+%! A = toeplitz (sparse ([2, 1 ,0, 0, 0]));
+%! b = A * ones (5, 1);
+%! M1 = diag (sqrt (diag (A)));
+%! M2 = M1; # M1 * M2 is the Jacobi preconditioner
+%! Afun = @(z) A*z;
+%! M1_fun = @(z) M1 \ z;
+%! M2_fun = @(z) M2 \ z;
+%! [x, flag, ~, iter] = pcg (A,b);
+%! assert (flag, 0)
+%! [x, flag, ~ , iter] = pcg (A, b, [], [], M1 * M2);
+%! assert (flag, 0)
+%! [x, flag, ~ , iter] = pcg (A, b, [], [], M1, M2);
+%! assert (flag, 0)
+%! [x, flag] = pcg (A, b, [], [], M1_fun, M2_fun);
+%! assert (flag, 0)
+%! [x, flag] = pcg (A, b,[],[], M1_fun, M2);
+%! assert (flag, 0)
+%! [x, flag] = pcg (A, b,[],[], M1, M2_fun);
+%! assert (flag, 0)
+%! [x, flag] = pcg (Afun, b);
+%! assert (flag, 0)
+%! [x, flag] = pcg (Afun, b,[],[], M1 * M2);
+%! assert (flag, 0)
+%! [x, flag] = pcg (Afun, b,[],[], M1, M2);
+%! assert (flag, 0)
+%! [x, flag] = pcg (Afun, b,[],[], M1_fun, M2);
+%! assert (flag, 0)
+%! [x, flag] = pcg (Afun, b,[],[], M1, M2_fun);
+%! assert (flag, 0)
+%! [x, flag] = pcg (Afun, b,[],[], M1_fun, M2_fun);
+%! assert (flag, 0)

 %!test
 %! ## solve small diagonal system
 %!
 %! N = 10;
-%! A = diag ([1:N]);
-%! b = rand (N, 1);
-%! [x, flag] = pcg (A, b, [], 2*N);
-%! assert (norm (A*x - b) / norm (b), 0, 1e-6);
+%! A = diag ([1:N]); b = rand (N, 1);
+%! X = A \ b;  # X is the true solution
+%! [x, flag] = pcg (A, b, [], N+1);
+%! assert (norm (x - X) / norm (X), 0, 1e-10);
 %! assert (flag, 0);

 %!test
-%! ## solve small indefinite diagonal system
-%! ## Despite A being indefinite, the iteration continues and converges.
+%! ## A not positive definite
 %! ## The indefiniteness of A is detected.
 %!
 %! N = 10;
-%! A = diag([1:N] .* (-ones(1, N) .^ 2));
-%! b = rand (N, 1);
-%! [x, flag] = pcg (A, b, [], 2*N);
-%! assert (norm (A*x - b) / norm (b), 0, 1e-6);
-%! assert (flag, 3);
+%! A = -diag ([1:N]); b = sum (A, 2);
+%! [x, flag] = pcg (A, b, [], N + 1);
+%! assert (flag, 4)
+

 %!test
 %! ## solve tridiagonal system, do not converge in default 20 iterations
-%!
 %! N = 100;
 %! A = zeros (N, N);
-%! ## Form 1-D Laplacian matrix (2 on main diagonal, -1 on supra/sub-diagonals)
-%! A = diag (repmat (2, [100, 1])) ...
-%!     + diag (repmat (-1, [99, 1]), -1) ...
-%!     + diag (repmat (-1, [99, 1]), +1);
+%! for i = 1 : N - 1 # form 1-D Laplacian matrix
+%!   A(i:i+1, i:i+1) = [2 -1; -1 2];
+%! endfor
 %! b = ones (N, 1);
-%! X = A \ b;  # X is the true solution
 %! [x, flag, relres, iter, resvec, eigest] = pcg (A, b, 1e-12);
 %! assert (flag);
-%! assert (relres > 1.0);
-%! assert (iter, 20); # should perform max allowable default number of iterations
+%! assert (relres >= 1.0);

 %!warning <iteration converged too fast>
 %! ## solve tridiagonal system with "perfect" preconditioner which converges
@@ -544,13 +523,151 @@
 %!
 %! N = 100;
 %! A = zeros (N, N);
-%! ## Form 1-D Laplacian matrix (2 on main diagonal, -1 on supra/sub-diagonals)
-%! A = diag (repmat (2, [100, 1])) ...
-%!     + diag (repmat (-1, [99, 1]), -1) ...
-%!     + diag (repmat (-1, [99, 1]), +1);
+%! for i = 1 : N - 1  # form 1-D Laplacian matrix
+%!   A(i:i+1, i:i+1) = [2 -1; -1 2];
+%! endfor
+%! b = ones (N, 1);
+%! X = A \ b;  # X is the true solution
+%! [x, flag, relres, iter, resvec, eigest] = pcg (A, b, [], [], A, [], b);
+%! assert (norm (x - X) / norm (X), 0, 1e-6);
+%! assert (flag, 0);
+%! assert (iter, 1); # should converge in one iteration
+%! assert (isnan (eigest), isnan ([NaN, NaN]));
+
+%!test
+%! ## pcg detect a non-Hermitian matrix, with a considerable imaginary part
+%! ## With this example, Matlab doesn't recognize the wrong type of matrix and
+%! ## makes iterations until it reaches maxit
+%! N = 10;
+%! A = diag (1:N) + 1i * 1e-04;
+%! b = ones (N, 1);
+%! [x,flag] = pcg (A, b, []);
+%! assert (flag, 4)
+
+%!test
+%! ## The imaginary part is not influent (it is too small), so pcg doesn't stop
+%! N = 10;
+%! A = diag (1:N) + 1i * 1e-10;
 %! b = ones (N, 1);
-%! [x, flag, relres, iter, resvec, eigest] = pcg (A, b, [], [], A, [], b);
-%! assert (norm (A*x - b) / norm (b), 0, 1e-6);
-%! assert (flag, 0);
-%! assert (iter, 1);  # should converge in one iteration
-%! assert (isnan (eigest), isnan ([NaN, NaN]));
+%! [x,flag] = pcg (A, b, [], N+1);
+%! assert (flag, 0)
+%! assert (x, A \ b, -1e-6)
+
+%!test
+%! ## pcg solves linear system with A Hermitian positive definite
+%! N = 20;
+%! A = toeplitz (sparse ([4, 1, zeros(1, 18)])) + ...
+%! 1i * toeplitz (sparse ([0, 1, zeros(1, 18)]), ...
+%! sparse ([0, -1, zeros(1,18)]));
+%! b = A * ones (N, 1);
+%! Hermitian_A = ishermitian (A);
+%! [x,flag] = pcg (A, b, [], 2*N);
+%! assert (Hermitian_A, true)
+%! assert (flag, 0)
+%! assert (x, ones (N, 1), -1e-4)
+
+%!test
+%! ## pcg solves preconditioned linear system with A HPD
+%! N = 20;
+%! A = toeplitz (sparse ([4, 1, zeros(1, 18)])) + ...
+%! 1i * toeplitz (sparse ([0, 1, zeros(1, 18)]), ...
+%! sparse ([0, -1, zeros(1,18)]));
+%! b = A * ones (N, 1);
+%! M2 = chol (A + 0.1 * eye (N)); # factor of a perturbed matrix
+%! M = M2' * M2;
+%! Hermitian_A = ishermitian (A);
+%! Hermitian_M = ishermitian (M);
+%! [x,flag] = pcg (A, b, [], 2*N, M);
+%! assert (Hermitian_A, true)
+%! assert (Hermitian_M, true)
+%! assert (flag, 0)
+%! assert (x, ones (N, 1), -1e-4)
+
+%!test
+%! ## pcg recognizes that the preconditioner matrix is singular
+%! N = 3;
+%! A = toeplitz ([2, 1, 0]);
+%! M = [1 0 0; 0 1 0; 0 0 0]; # the last rows is zero
+%! [x, flag] = pcg (A, ones(3, 1), [], [], M);
+%! assert (flag, 2)
+
+%!test
+%! A = rand (4);
+%! A = A' * A;
+%! [x, flag] = pcg (A, zeros (4, 1), [], [], [], [], ones (4, 1));
+%! assert (x, zeros (4, 1))
+
+%!test
+%! A = single (1);
+%! b = 1;
+%! [x, flag] = pcg (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = 1;
+%! b = single (1);
+%! [x, flag] = pcg (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = single (1);
+%! b = single (1);
+%! [x, flag] = pcg (A, b);
+%! assert (class (x), "single")
+
+%!test
+%!function y = Afun (x)
+%!   A = toeplitz ([2, 1, 0, 0]);
+%!   y = A * x;
+%!endfunction
+%! [x, flag] = pcg ("Afun", [3; 4; 4; 3]);
+%! assert (x, ones(4, 1), 1e-6)
+
+%!test # unpreconditioned residual
+%! A = toeplitz (sparse ([4, 1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M = toeplitz (sparse ([2, 1, 0, 0, 0]));
+%! [x, flag, relres] = pcg (A, b, [], 2, M);
+%! assert (relres, norm ((b - A * x)) / norm (b), 8 * eps)
+
+%!demo # simplest use
+%! n = 10;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1], 1, n));
+%! b = A * ones (n, 1);
+%! M1 = ichol (A); # in this tridiagonal case it corresponds to chol (A)'
+%! M2 = M1';
+%! M = M1 * M2;
+%! x = pcg (A, b);
+%! Afun = @(x) A * x;
+%! x = pcg (Afun, b);
+%! x = pcg (A, b, 1e-6, 100, M);
+%! x = pcg (A, b, 1e-6, 100, M1, M2);
+%! Mfun = @(x) M \ x;
+%! x = pcg (Afun, b, 1e-6, 100, Mfun);
+%! M1fun = @(x) M1 \ x;
+%! M2fun = @(x) M2 \ x;
+%! x = pcg (Afun, b, 1e-6, 100, M1fun, M2fun);
+%! function y = Ap (A, x, p) # compute A^p * x
+%!    y = x;
+%!    for i = 1:p
+%!      y = A * y;
+%!    endfor
+%!  endfunction
+%! Afun = @(x, p) Ap (A, x, p);
+%! x = pcg (Afun, b, [], [], [], [], [], 2); # solution of A^2 * x = b
+
+%!demo
+%! n = 10;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1], 1, n));
+%! b = A * ones (n, 1);
+%! M1 = ichol (A + 0.1 * eye (n)); # factorization of A perturbed
+%! M2 = M1';
+%! M = M1 * M2;
+%!
+%! ## reference solution computed by pcg after two iterations
+%! [x_ref, fl] = pcg (A, b, [], 2, M);
+%! x_ref
+%!
+%! ## split preconditioning
+%! [y, fl] = pcg ((M1 \ A) / M2, M1 \ b, [], 2);
+%! x = M2 \ y # compare x and x_ref
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/sparse/private/__alltohandles__.m	Sun Mar 11 07:29:04 2018 +0900
@@ -0,0 +1,139 @@
+## Copyright (C) 2016 Cristiano Dorigo, Octave Arena
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not,
+## see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn  {} {[@var{Afun}, @var{M1fun}, @var{M2fun}] =} __alltohandles__ (@var{A}, @var{b}, @var{M1}, @var{M2}, @var{solver_name})
+##
+## Check if the parameters @var{A} (matrix of our linear system), @var{b}
+## (right hand side vector), @var{M1}, @var{M2} (preconditioner matrices) are
+## really matrices or functions handle, summarizing if they are void or not.
+##
+## The input parameters are:
+##
+## @itemize
+## @item @var{A} is the matrix of the linear system.
+##
+## @item @var{b} is the right hand side vector.
+##
+## @item @var{M1}, @var{M2} preconditioners. They can be [].
+##
+## @item @var{solver_name} is the name of the solver as string.
+##
+## @end itemize
+##
+## The output parameters are:
+## @itemize
+##
+## @item @var{Afun}, @var{M1fun}, @var{M2fun} are the corresponding
+## function handles.
+##
+## @end itemize
+## @end deftypefn
+
+function [Afun, M1fun, M2fun] = __alltohandles__ (A, b, M1, M2, solver_name)
+
+  A_is_numeric = false;
+  M1_is_numeric = false;
+  M2_is_numeric = false;
+
+  ## Check A and set its type
+  if (isa (A, "function_handle"))
+     Afun = A;
+  elseif (ischar (A))
+    Afun = str2func (A);
+  elseif (!isnumeric (A) || !issquare (A))
+    error([solver_name, ": A must be a square matrix or a function handle"])
+  else
+    A_is_numeric = true;
+    if (size (A, 2) != size (b, 1))
+      error ("__alltohandles__: dimension of b is not consistent with A")
+    endif
+  endif
+
+  ## Check M1 and sets its type
+  if (isempty (M1)) # M1 empty, set to identity function
+      M1fun = @(x) x;
+  else # M1 not empty
+    if (isa (M1, "function_handle"))
+      M1fun = M1;
+    elseif (ischar (M1))
+      M1fun = str2func (M1);
+    elseif (!isnumeric (M1) || !issquare (M1))
+      error([solver_name, ": M1 must be a square matrix or a function handle"])
+    else
+      M1_is_numeric = true;
+    endif
+  endif
+
+  if (isempty (M2)) # M2 empty, then I set is to the identity function
+    M2fun = @(x) x;
+  else # M2 not empty
+    if (isa (M2, "function_handle"))
+      M2fun = M2;
+    elseif (ischar (M2))
+      M2fun = str2func (M2);
+    elseif (!isnumeric (M2) || !issquare (M2))
+      error([solver_name, ": M2 must be a square matrix or a function handle"])
+    else
+      M2_is_numeric = true;
+    endif
+  endif
+
+  switch solver_name
+    case {"pcg", "gmres", "bicgstab", "cgs", "tfqmr"}
+      # methods which do not require the transpose
+      if (A_is_numeric)
+        Afun = @(x) A * x;
+      endif
+      if (M1_is_numeric)
+        M1fun = @(x) M1 \ x;
+      endif
+      if (M2_is_numeric)
+        M2fun = @(x) M2 \ x;
+      endif
+    case {"bicg"}
+      # methods which do require the transpose
+      if (A_is_numeric)
+        Afun = @(x, trans) A_sub (A, x, trans);
+      endif
+      if (M1_is_numeric)
+        M1fun = @(x, trans) M_sub (M1, x, trans);
+      endif
+      if (M2_is_numeric)
+        M2fun = @(x, trans) M_sub (M2, x, trans);
+      endif
+    otherwise
+      error (["__alltohandles__: unknown method: ", solver_name]);
+  endswitch
+endfunction
+
+function y = A_sub (A, x, trans)
+  if (strcmp (trans, "transp"))
+    y = A' * x;
+  else
+    y = A * x;
+  endif
+endfunction
+
+function y = M_sub (M, x, trans)
+  if (strcmp (trans, "transp"))
+    y = M' \ x;
+  else
+    y = M \ x;
+  endif
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/sparse/private/__default__input__.m	Sun Mar 11 07:29:04 2018 +0900
@@ -0,0 +1,52 @@
+## Copyright (C) 2016 Cristiano Dorigo, Octave Arena
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or
+## (at your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not,
+## see <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn  {} {@var{[varargout]} =} __default__input__ (@var{def_val}, @var{varargin})
+## Check if the arguments in input of a function are empty or missing
+## and in such cases sets up them in default values.
+##
+## The input argoments are:
+## @itemize @minus
+## @item @var{def_val} is a cell array that contains the values to use
+## as  default.
+## @item @var{varargin} are the input argument
+## @end itemize
+##
+## The output argoments:
+## @itemize @minus
+## @item @var{varargout} all the input argument with filled the empty
+## or missing paramenters.
+##
+## @end itemize
+##
+## @end deftypefn
+
+
+function [varargout] = __default__input__ (def_val, varargin)
+
+  m = length (def_val);
+  n = length (varargin);
+
+  for i = 1:m
+    if (n < i || isempty (varargin {i}))
+      varargout {i} = def_val {i};
+    else
+      varargout {i} = varargin {i};
+    endif
+  endfor
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/sparse/tfqmr.m	Sun Mar 11 07:29:04 2018 +0900
@@ -0,0 +1,505 @@
+## Copyright (C) 2016 Cristiano Dorigo, Octave Arena
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn  {} {@var{x} =} tfqmr (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M1}, @var{M2}, @var{x0}, @dots{})
+## @deftypefnx {} {@var{x} =} tfqmr (@var{A}, @var{b}, @var{tol}, @var{maxit}, @var{M}, [], @var{x0}, @dots{})
+## @deftypefnx {} {[@var{x}, @var{flag}, @var{relres}, @var{iter}, @var{resvec}] =} tfqmr (@var{A}, @var{b}, @dots{})
+## Solve @code{A x = b} using the Transpose-Tree qmr method, based on the cgs.
+##
+## The input parameters are:
+##
+## @itemize @minus
+##
+## @item @var{A} is the matrix of the linear system and it must be square.
+## @var{A} can be passed as a matrix, function handle, or inline
+## function @code{Afun} such that @code{Afun(x) = A * x}. Additional
+## parameters to @code{Afun} are passed after @var{x0}.
+##
+## @item @var{b} is the right hand side vector. It must be a column vector
+## with the same number of rows as @var{A}.
+##
+## @item @var{tol} is the relative tolerance, if not given or set to [] the
+## default value 1e-6 is used.
+##
+## @item @var{maxit} the maximum number of outer iterations, if not given or
+## set to [] the default value @code{min (20, numel (b))} is used. To be
+## compatible, since the method as different behaviours in the iteration
+## number is odd or even, is considered as iteration in @code{tfqmr} the
+## entire odd-even cycle. That is, to make an entire iteration, the algorithm
+## performs two sub-iterations: the odd one and the even one.
+##
+## @item @var{M1}, @var{M2} are the preconditioners. The preconditioner
+## @var{M} is given as @code{M = M1 * M2}.
+## Both @var{M1} and @var{M2} can be passed as a matrix or as a function
+## handle or inline function @code{g} such that @code{g(x) = M1 \ x} or
+## @code {g(x) = M2 \ x}.
+## The technique used is the rigth-preconditioning, i.e. it is solved
+## @code{A*inv(M)*y = b} and then @code{x = inv(M)*y}, instead of
+## @code{A x = b}.
+##
+## @item @var{x0} the initial guess, if not given or set to [] the default
+## value @code{zeros (size (b))} is used.
+##
+## @end itemize
+##
+## The arguments which follow @var{x0} are treated as parameters, and passed in
+## a proper way to any of the functions (@var{A} or @var{M}) which are passed
+## to @code{tfqmr}.
+##
+## The output parameters are:
+##
+## @itemize @minus
+##
+## @item @var{x} is the approximation computed. If the method doesn't
+## converge then it is the iterated with the minimum residual.
+##
+## @item @var{flag} indicates the exit status:
+##
+## @itemize @minus
+## @item 0: iteration converged to the within the chosen tolerance
+##
+## @item 1: the maximum number of iterations was reached before convergence
+##
+## @item 2: the preconditioner matrix is singular
+##
+## @item 3: the algorithm reached stagnation
+##
+## @item 4: the algorithm can't continue due to a division by zero
+## @end itemize
+##
+## @item @var{relres} is the relative residual obtained as
+## @code{(@var{A}*@var{x}-@var{b}) / @code{norm (@var{b})}}.
+##
+## @item @var{iter} is the iteration which @var{x} is
+## computed.
+##
+## @item @var{resvec} is a vector containing the residual at each iteration
+## (including @code{norm (b - A x0)}).
+## Doing @code{length (@var{resvec}) - 1} is possible to see the
+## total number of iterations performed.
+##
+## @end itemize
+##
+## Let us consider a trivial problem with a tridiagonal matrix
+##
+## @example
+## @group
+## n = 20;
+## A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+##     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+##     sparse (1, 2, 1, 1, n) * n / 2);
+## b = A * ones (n, 1);
+## restart = 5;
+## [M1, M2] = ilu (A); # in this tridiag case it corresponds to chol (A)'
+## M = M1 * M2;
+## Afun = @@(x) A * x;
+## Mfun = @@(x) M \ x;
+## M1fun = @@(x) M1 \ x;
+## M2fun = @@(x) M2 \ x;
+## @end group
+## @end example
+##
+## @sc{Example 1:} simplest usage of @code{tfqmr}
+##
+## @example
+## x = tfqmr (A, b, [], n)
+## @end example
+##
+## @sc{Example 2:} @code{tfqmr} with a function which computes
+## @code{@var{A} * @var{x}}
+##
+## @example
+## x = tfqmr (Afun, b, [], n)
+## @end example
+##
+## @sc{Example 3:} @code{tfqmr} with a preconditioner matrix @var{M}
+##
+## @example
+## x = tfqmr (A, b, [], 1e-06, n, M)
+## @end example
+##
+## @sc{Example 4:} @code{tfqmr} with a function as preconditioner
+##
+## @example
+## x = tfqmr (Afun, b, 1e-6, n, Mfun)
+## @end example
+##
+## @sc{Example 5:} @code{tfqmr} with preconditioner matrices @var{M1}
+## and @var{M2}
+##
+## @example
+## x = tfqmr (A, b, [], 1e-6, n, M1, M2)
+## @end example
+##
+## @sc{Example 6:} @code{tfmqr} with functions as preconditioners
+##
+## @example
+## x = tfqmr (Afun, b, 1e-6, n, M1fun, M2fun)
+## @end example
+##
+## @sc{Example 7:} @code{tfqmr} with as input a function requiring an argument
+##
+## @example
+## @group
+## function y = Ap (A, x, z) # compute A^z * x
+##    y = x;
+##    for i = 1:z
+##      y = A * y;
+##    endfor
+##  endfunction
+## Apfun = @@(x, string, p) Ap (A, x, string, p);
+## x = tfqmr (Apfun, b, [], [], [], [], [], 2);
+## @end group
+## @end example
+##
+## @sc{Example 8:} explicit example to show that @code{tfqmr} uses a
+## right preconditioner
+##
+## @example
+## @group
+## [M1, M2] = ilu (A + 0.3 * eye (n)); # factorization of A perturbed
+## M = M1 * M2;
+##
+## ## reference solution computed by tfqmr after one iteration
+## [x_ref, fl] = tfqmr (A, b, [], 1, M)
+##
+## ## rigth preconditioning
+## [y, fl] = tfqmr (A / M, b, [], 1)
+## x = M \ y # compare x and x_ref
+##
+## @end group
+## @end example
+##
+## References:
+##
+## @enumerate
+##
+## @item @nospell{Y. Saad}, @cite{Iterative Methods for Sparse Linear Systems},
+## Second edition, 2003, SIAM
+##
+## @end enumerate
+##
+## @seealso{bicg, bicgstab, cgs, gmres, pcg, qmr, pcr}
+##
+## @end deftypefn
+
+function [x_min, flag, relres, iter_min, resvec] = ...
+         tfqmr (A, b, tol = [], maxit = [], M1 = [], M2 = [], ...
+                x0 = [], varargin)
+
+  [Afun, M1fun, M2fun] = __alltohandles__ (A, b, M1, M2, "tfqmr");
+
+  [tol, maxit, x0] = __default__input__ ({1e-06, 2 * min(20, rows (b)), ...
+                                          zeros(rows (b), 1)}, tol, ...
+                                         maxit, x0);
+
+  maxit = 2 * maxit; # To be compatible, since iteration = odd+even ones
+
+  norm_b = norm (b, 2);
+  if (norm_b == 0)
+    if (nargout < 2)
+      printf("The right hand side vector is all zero so tfqmr \n")
+      printf ("returned an all zero solution without iterating.\n")
+    endif
+    x_min = zeros (numel (b), 1);
+    iter_min = 0;
+    flag = 0;
+    resvec = 0;
+    relres = 0;
+    return
+  endif
+
+  x = x_pr = x_min = x0;
+  iter = iter_min = m = 0;
+  resvec = zeros (maxit, 1);
+  flag = 1;
+
+  w = u = r = r_star = b - feval (Afun, x0, varargin{:});
+  rho_1 = (r_star' * r);
+  d = 0;
+  tau = norm (r, 2);
+  theta = eta = 0;
+  resvec (1, 1) = norm (r, 2);
+  it = 1;
+
+  try
+    warning("error", "Octave:singular-matrix", "local");
+    u_hat = feval (M1fun, u, varargin{:});
+    u_hat = feval (M2fun, u_hat, varargin{:});
+    v = feval (Afun, u_hat, varargin{:});
+  catch
+    flag = 2;
+  end_try_catch
+  while ((flag != 2) && (iter < maxit) && ...
+         (resvec (iter + 1, 1) >= norm_b * tol))
+    if (it > 0) # iter is even
+      v_r = r_star' * v; # inner prod between r_star and v
+      if (v_r == 0)
+        ## Essentially the next iteration doesn't change x,
+        ## and the iter after this will have a division by zero
+        flag = 4;
+        break
+      endif
+      alpha = rho_1 / v_r;
+      u_1 = u - alpha * v;  # u at the after iteration
+    endif
+    u_hat = feval (M1fun, u, varargin{:});
+    u_hat = feval (M2fun, u_hat, varargin{:});
+    w -= alpha * feval (Afun, u_hat, varargin{:});
+    d = u_hat + ((theta * theta) / alpha) * eta * d;
+    theta = norm (w, 2) / tau;
+    c = 1 / sqrt (1 + theta * theta);
+    tau *= theta * c;
+    eta = (c * c) * alpha;
+    x += eta * d;
+    r -= eta * feval (Afun, d, varargin{:});
+    if (it < 0) # iter is odd
+      rho_2 = rho_1;
+      rho_1 = (r_star' * w);
+      if (rho_1 == 0)
+        ## Essentially the next iteration doesn't change x,
+        ## and the iter after this will have a division by zero
+        flag = 4;
+        break
+      endif
+      beta = rho_1 / rho_2;
+      u_1 = w + beta * u; # u at the after iteration
+      u1_hat = feval (M1fun, u_1, varargin{:});
+      u1_hat = feval (M2fun, u1_hat, varargin{:});
+      v = feval (Afun, u1_hat, varargin{:}) + ...
+          beta * (feval (Afun, u_hat, varargin{:}) + beta * v);
+    endif
+    u = u_1;
+    iter += 1;
+    resvec (iter + 1, 1) = norm (r, 2);
+    if (resvec (iter + 1, 1) <= resvec (iter_min + 1, 1))
+      ## iter with min residual
+      x_min = x;
+      iter_min = iter;
+    endif
+    if (norm (x_pr - x) <= norm (x) * eps)
+      flag = 3; # Stagnation
+      break
+    endif
+    x_pr = x;
+    it = -it;
+  endwhile
+  resvec = resvec (1: (iter + 1));
+
+  relres = resvec (iter_min + 1) / norm (b);
+  iter_min = floor(iter_min / 2); # compatibility, since it
+                                  # makes two times the effective iterations
+
+  if (relres <= tol)
+    flag = 0;
+  endif
+
+  if (nargout < 2) # Output strings
+    switch (flag)
+      case {0}
+        printf ("tfqmr converged at iteration %i ", iter_min);
+        printf ("to a solution with relative residual %e\n", relres);
+      case {1}
+        printf ("tfqmr stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the maximum number of iterations was reached.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {2}
+        printf ("tfqmr stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the preconditioner matrix is singular.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {3}
+        printf ("tfqmr stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the method stagnated.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+      case {4}
+        printf ("tfqmr stopped at iteration %i ", iter);
+        printf ("without converging to the desired tolerance %e\n", tol);
+        printf ("because the method can't continue.\n");
+        printf ("The iterate returned (number %i) ", iter_min);
+        printf ("has relative residual %e\n", relres);
+    endswitch
+  endif
+endfunction
+%!test
+%! ## Check that all type of inputs work
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M1 = diag (sqrt (diag (A)));
+%! M2 = M1;
+%! maxit = 10;
+%! Afun = @(z) A * z;
+%! M1_fun = @(z) M1 \ z;
+%! M2_fun = @(z) M2 \ z;
+%! [x, flag] = tfqmr (A,b);
+%! assert (flag, 0);
+%! [x, flag] = tfqmr (A, b, [], maxit, M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = tfqmr (A, b, [], maxit, M1_fun, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = tfqmr (A, b, [], maxit, M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = tfqmr (A, b, [], maxit, M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = tfqmr (Afun, b);
+%! assert (flag, 0);
+%! [x, flag] = tfqmr (Afun, b, [], maxit, M1, M2);
+%! assert (flag, 0);
+%! [x, flag] = tfqmr (Afun, b, [], maxit, M1_fun, M2);
+%! assert (flag, 0);
+%! [x, flag] = tfqmr (Afun, b, [], maxit, M1, M2_fun);
+%! assert (flag, 0);
+%! [x, flag] = tfqmr (Afun, b, [], maxit, M1_fun, M2_fun);
+%! assert (flag, 0);
+
+%!shared A, b, n, M1, M2
+%!
+%!test
+%! n = 100;
+%! A = spdiags ([-2*ones(n,1) 4*ones(n,1) -ones(n,1)], -1:1, n, n);
+%! b = sum (A, 2);
+%! tol = 1e-8;
+%! maxit = 15;
+%! M1 = spdiags ([ones(n,1)/(-2) ones(n,1)],-1:0, n, n);
+%! M2 = spdiags ([4*ones(n,1) -ones(n,1)], 0:1, n, n);
+%! [x, flag, relres, iter, resvec] = tfqmr (A, b, tol, maxit, M1, M2);
+%! assert (x, ones (size (b)), 1e-7);
+%!
+%!test
+%!function y = afun (x, a)
+%!  y = a * x;
+%!endfunction
+%!
+%! tol = 1e-8;
+%! maxit = 15;
+%!
+%! [x, flag, relres, iter, resvec] = tfqmr (@(x) afun (x, A), b,
+%!                                             tol, maxit, M1, M2);
+%! assert (x, ones (size (b)), 1e-7);
+
+%!test
+%! n = 10;
+%! tol = 1e-8;
+%! a = (2 * sprand (n, n, .1) - 1) + 1i * (2 * sprand (n, n, .1) - 1);
+%! A = a + 2 * eye (n);
+%! b = sum (A, 2);
+%! [x, flag, relres, iter, resvec] = tfqmr (A, b, tol, [], diag (diag (A)));
+%! assert (x, ones (size (b)), 1e-7);
+
+%!test
+%! ## Solve complex linear system
+%! A = [1 + 1i, 1 + 1i; 2 - 1i, 2 + 1i];
+%! b = A * [1; 1];
+%! [x, flag, relres, iter, resvec] = tfqmr (A, b, [], 3);
+%! assert (x, [1; 1], 1e-6);
+
+%!test
+%! A = diag (1:50);
+%! A (1,50) = 10000;
+%! b = ones (50,1);
+%! [x, flag, relres, iter, resvec] = tfqmr (A, b, [], 100);
+%! assert (flag, 0)
+%! assert (x, A \ b, 1e-05)
+%! ## Detects a singular preconditioner
+%! M = ones (50);
+%! M(1, 1) = 0;
+%! [x, flag] = tfqmr (A, b, [], 100, M);
+%! assert (flag, 2)
+
+%!test
+%! A = single (1);
+%! b = 1;
+%! [x, flag] = tfqmr (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = 1;
+%! b = single (1);
+%! [x, flag] = tfqmr (A, b);
+%! assert (class (x), "single")
+
+%!test
+%! A = single (1);
+%! b = single (1);
+%! [x, flag] = tfqmr (A, b);
+%! assert (class (x), "single")
+
+%!test
+%!function y = Afun (x)
+%!   A = toeplitz ([2, 1, 0, 0], [2, -1, 0, 0]);
+%!   y = A * x;
+%!endfunction
+%! [x, flag] = tfqmr ("Afun", [1; 2; 2; 3]);
+%! assert (x, ones(4, 1), 1e-6)
+
+%!test # unpreconditioned residual
+%! A = toeplitz (sparse ([2, 1, 0, 0, 0]), sparse ([2, -1, 0, 0, 0]));
+%! b = sum (A, 2);
+%! M = magic (5);
+%! [x, flag, relres] = tfqmr (A, b, [], 3, M);
+%! assert (relres, norm (b - A * x) / norm (b), 8 * eps)
+
+%!demo # simplest use
+%! n = 20;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+%!     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+%!     sparse (1, 2, 1, 1, n) * n / 2);
+%! b = A * ones (n, 1);
+%! [M1, M2] = ilu (A + 0.1 * eye (n));
+%! M = M1 * M2;
+%! x = tfqmr (A, b, [], n);
+%! Afun = @(x) A * x;
+%! x = tfqmr (Afun, b, [], n);
+%! x = tfqmr (A, b, 1e-6, n, M);
+%! x = tfqmr (A, b, 1e-6, n, M1, M2);
+%! Mfun = @(z) M \ z;
+%! x = tfqmr (Afun, b, 1e-6, n, Mfun);
+%! M1fun = @(z) M1 \ z;
+%! M2fun = @(z) M2 \ z;
+%! x = tfqmr (Afun, b, 1e-6, n, M1fun, M2fun);
+%! function y = Ap (A, x, z) # compute A^z * x or (A^z)' * x
+%!    y = x;
+%!    for i = 1:z
+%!      y = A * y;
+%!    endfor
+%!  endfunction
+%! Afun = @(x, p) Ap (A, x, p);
+%! x = tfqmr (Afun, b, [], 2*n, [], [], [], 2); # solution of A^2 * x = b
+
+%!demo
+%! n = 10;
+%! A = toeplitz (sparse ([1, 1], [1, 2], [2, 1] * n ^ 2, 1, n))  + ...
+%!     toeplitz (sparse (1, 2, -1, 1, n) * n / 2, ...
+%!     sparse (1, 2, 1, 1, n) * n / 2);
+%! b = A * ones (n, 1);
+%! [M1, M2] = ilu (A + 0.3 * eye (n)); # factorization of A perturbed
+%! M = M1 * M2;
+%!
+%! ## reference solution computed by tfqmr after one iteration
+%! [x_ref, fl] = tfqmr (A, b, [], 1, M);
+%! x_ref
+%!
+%! ## right preconditioning
+%! [y, fl] = tfqmr (A / M, b, [], 1);
+%! x = M \ y # compare x and x_ref