cmdla/11-09/SDG.m

function [ x , status ] = SDG( f , varargin )

%function [ x , status ] = SDG( f , x , astart , eps , MaxFeval , m1 , ... 
%                               m2 , tau , sfgrd , MInf , mina )
%
% Apply the classical Steepest Descent algorithm for the minimization of
% the provided function f, which must have the following interface:
%
%   [ v , g ] = f( x )
%
% Input:
%
% - x is either a [ n x 1 ] real (column) vector denoting the input of
%   f(), or [] (empty).
%
% Output:
%
% - v (real, scalar): if x == [] this is the best known lower bound on
%   the unconstrained global optimum of f(); it can be -Inf if either f()
%   is not bounded below, or no such information is available. If x ~= []
%   then v = f( x ).
%
% - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
%   this is the standard starting point from which the algorithm should
%   start, otherwise it is the gradient of f() at x (or a subgradient if
%   f() is not differentiable at x, which it should not be if you are
%   applying the gradient method to it).
%
% The other [optional] input parameters are:
%
% - x (either [ n x 1 ] real vector or [], default []): starting point.
%   If x == [], the default starting point provided by f() is used.
%
% - astart (real scalar, optional, default value 1): if it is > 0, then it
%   is used as the starting value of alpha in the line search, be it the
%   Armijo-Wolfe or the Backtracking one. Otherwise, it is taken to mean
%   that no line search is to be performed, i.e., a fixed step approach
%   has to be used with step = - astart (hence, astart == 0 is an invald
%   setting in either case).
%
% - eps (real scalar, optional, default value 1e-6): the accuracy in the 
%   stopping criterion: the algorithm is stopped when the norm of the
%   gradient is less than or equal to eps. If a negative value is provided,
%   this is used in a *relative* stopping criterion: the algorithm is
%   stopped when the norm of the gradient is less than or equal to
%   (- eps) * || norm of the first gradient ||.
%
% - MaxFeval (integer scalar, optional, default value 1000): the maximum
%   number of function evaluations (hence, iterations will be not more than
%   MaxFeval because at each iteration at least a function evaluation is
%   performed, possibly more due to the line search).
%
% - m1 (real scalar, optional, must be in ( 0 , 1 ), default value 1e-3): 
%   parameter of the Armijo condition (sufficient decrease) in the line
%   search
%
% - m2 (real scalar, optional, default value 0.9): typically the parameter
%   of the Wolfe condition (sufficient derivative increase) in the line
%   search. It should to be in ( 0 , 1 ); if not, it is taken to mean that
%   the simpler Backtracking line search should be used instead
%
% - tau (real scalar, optional, default value 0.9): scaling parameter for
%   the line search. In the Armijo-Wolfe line search it is used in the
%   first phase to identify a point where the Armijo condition is not
%   satisfied or the derivative is positive by divding the current
%   value (starting with astart, see above) by tau (which is < 1, hence it
%   is increased). In the Backtracking line search, each time the step is
%   multiplied by tau (hence it is decreased).
%
% - sfgrd (real scalar, optional, default value 0.01): safeguard parameter
%   for the line search. to avoid numerical problems that can occur with
%   the quadratic interpolation if the derivative at one endpoint is too
%   large w.r.t. the one at the other (which leads to choosing a point
%   extremely near to the other endpoint), a *safeguarded* version of
%   interpolation is used whereby the new point is chosen in the interval
%   [ as * ( 1 + sfgrd ) , am * ( 1 - sfgrd ) ], being [ as , am ] the
%   current interval, whatever quadratic interpolation says. If you
%   experiemce problems with the line search taking too many iterations to
%   converge at "nasty" points, try to increase this
%
% - MInf (real scalar, optional, default value -Inf): if the algorithm
%   determines a value for f() <= MInf this is taken as an indication that
%   the problem is unbounded below and computation is stopped
%   (a "finite -Inf").
%
% - mina (real scalar, optional, default value 1e-16): if the algorithm
%   determines a stepsize value <= mina, this is taken as an indication
%   that something has gone wrong (the gradient is not a direction of
%   descent, so maybe the function is not differentiable) and computation
%   is stopped. It is legal to take mina = 0, thereby in fact skipping this
%   test.
%
% Output:
%
% - x ([ n x 1 ] real column vector): the best solution found so far
%
% - status (string): a string describing the status of the algorithm at
%   termination 
%
%   = 'optimal': the algorithm terminated having proven that x is a(n
%     approximately) optimal solution, i.e., the norm of the gradient at x
%     is less than the required threshold
%
%   = 'unbounded': the algorithm has determined an extrenely large negative
%     value for f() that is taken as an indication that the problem is
%     unbounded below (a "finite -Inf", see MInf above)
%
%   = 'stopped': the algorithm terminated having exhausted the maximum
%     number of iterations: x is the bast solution found so far, but not
%     necessarily the optimal one
%
%   = 'error': the algorithm found a numerical error that prevents it from
%     continuing optimization (see mina above)
%
%{
 =======================================
 Author: Antonio Frangioni
 Date: 27-04-23
 Version 1.30
 Copyright Antonio Frangioni
 =======================================
%}

Plotf = 1;
% 0 = nothing is plotted
% 1 = the level sets of f and the trajectory are plotted (when n = 2)
% 2 = the function value / gap are plotted, iteration-wise
% 3 = the function value / gap are plotted, function-evaluation-wise

Interactive = true;  % if we pause at every iteration

if Plotf > 1
   if Plotf == 2
      MaxIter = 200;   % expected number of iterations for the gap plot
   else
      MaxIter = 1000;  % expected number of iterations for the gap plot
   end    
   gap = [];
end

% reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

if ~ isa( f , 'function_handle' )
   error( 'f not a function' );
end

if isempty( varargin ) || isempty( varargin{ 1 } )
   [ fStar , x ] = f( [] );
else
   x = varargin{ 1 };
   if ~ isreal( x )
      error( 'x not a real vector' );
   end

   if size( x , 2 ) ~= 1
      error( 'x is not a (column) vector' );
   end
   
   fStar = f( [] );
end

n = size( x , 1 );

if length( varargin ) > 1
   astart = varargin{ 2 };
   if ~ isscalar( astart )
      error( 'astart is not a real scalar' );
   end
   if astart == 0
      error( 'astart must be != 0' );
   end
else
   astart = 1;
end

if length( varargin ) > 2
   eps = varargin{ 3 };
   if ~ isreal( eps ) || ~ isscalar( eps )
      error( 'eps is not a real scalar' );
   end
else
   eps = 1e-6;
end

if length( varargin ) > 3
   MaxFeval = round( varargin{ 4 } );
   if ~ isscalar( MaxFeval )
      error( 'MaxFeval is not an integer scalar' );
   end
else
   MaxFeval = 1000;
end

if length( varargin ) > 4
   m1 = varargin{ 5 };
   if ~ isscalar( m1 )
      error( 'm1 is not a real scalar' );
   end
   if m1 <= 0 || m1 >= 1
      error( 'm1 is not in ( 0 , 1 )' );
   end       
else
   m1 = 1e-3;
end

if length( varargin ) > 5
   m2 = varargin{ 6 };
   if ~ isscalar( m1 )
      error( 'm2 is not a real scalar' );
   end
else
   m2 = 0.9;
end

AWLS = ( m2 > 0 && m2 < 1 );

if length( varargin ) > 6
   tau = varargin{ 7 };
   if ~ isscalar( tau )
      error( 'tau is not a real scalar' );
   end
   if tau <= 0 || tau >= 1
      error( 'tau is not in ( 0 , 1 )' );
   end       
else
   tau = 0.9;
end

if length( varargin ) > 7
   sfgrd = varargin{ 8 };
   if ~ isscalar( sfgrd )
      error( 'sfgrd is not a real scalar' );
   end
   if sfgrd <= 0 || sfgrd >= 1
      error( 'sfgrd is not in ( 0 , 1 )' );
   end
else
   sfgrd = 0.01;
end

if length( varargin ) > 8
   MInf = varargin{ 9 };
   if ~ isscalar( MInf )
      error( 'MInf is not a real scalar' );
   end
else
   MInf = - Inf;
end

if length( varargin ) > 9
   mina = varargin{ 10 };
   if ~ isscalar( mina )
      error( 'mina is not a real scalar' );
   end
   if mina < 0
      error( 'mina is < 0' );
   end       
else
   mina = 1e-16;
end

if Plotf > 1
   xlim( [ 0 MaxIter ] );
   ax = gca;
   ax.FontSize = 16;
   ax.Position = [ 0.03 0.07 0.95 0.92 ];
   ax.Toolbar.Visible = 'off';
end

% "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

lastx = zeros( n , 1 );  % last point visited in the line search
lastg = zeros( n , 1 );  % gradient of lastx
feval = 1;               % f() evaluations count ("common" with LSs)

% initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

fprintf( 'Gradient method\n');
if fStar > - Inf
   fprintf( 'feval\trel gap\t\t|| g(x) ||\trate\t');
   prevv = Inf;
else
   fprintf( 'feval\tf(x)\t\t\t|| g(x) ||');
end
if astart > 0
   fprintf( '\tls feval\ta*' );
end
fprintf( '\n\n' );

% compute first f-value and gradient in x^0 - - - - - - - - - - - - - - - -

g = 0;
v = f2phi( 0 );
g = lastg;

% compute norm of the (first) gradient- - - - - - - - - - - - - - - - - - -

ng = norm( g );
if eps < 0
   ng0 = - ng;  % norm of first subgradient: why is there a "-"? ;-)
else
   ng0 = 1;     % un-scaled stopping criterion
end

% main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

while true
 
    % output statistics & plot gap/f-values - - - - - - - - - - - - - - - -
  
    if fStar > - Inf
       gapk = ( v - fStar ) / max( [ abs( fStar ) 1 ] );

       fprintf( '%4d\t%1.4e\t%1.4e' , feval , gapk , ng );
       if prevv < Inf
          fprintf( '\t%1.4e' , ( v - fStar ) / ( prevv - fStar ) );
       else
          fprintf( '          \t  ' );           
       end
       prevv = v;

       if Plotf > 1
          if Plotf >= 2
             gap( end + 1 ) = gapk;
          end
          semilogy( gap , 'Color' , 'k' , 'LineWidth' , 2 );
          if Plotf == 2
             ylim( [ 1e-15 1e+1 ] );
          else
             ylim( [ 1e-15 1e+4 ] );
          end
          %drawnow;
       end
    else
       fprintf( '%4d\t%1.8e\t\t%1.4e' , feval , v , ng );

       if Plotf > 1
          if Plotf >= 2
             gap( end + 1 ) = v;
          end
          plot( gap , 'Color' , 'k' , 'LineWidth' , 2 );
          %drawnow;
       end
    end

    % stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -

    if ng <= eps * ng0
       status = 'optimal';
       fprintf( '\n' );
       break;
    end
    
    if feval > MaxFeval
       status = 'stopped';
       fprintf( '\n' );
       break;
    end
    
    % compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -

    phip0 = - ng * ng;

    if astart < 0
       % fixed-step approach

       lastx = x + astart * g;
       [ v , lastg ] = f( lastx );
       feval = feval + 1;
    else
       % line-search approach, either Armijo-Wolfe or Backtracking

       if AWLS
          [ a , v ] = ArmijoWolfeLS( v , phip0 , astart , m1 , m2 , tau );
       else
          [ a , v ] = BacktrackingLS( v , phip0 , astart , m1 , tau );
       end
    end

    % output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -

    if astart > 0
       fprintf( '\t%1.4e\n' , a );

       if a <= mina
          status = 'error';
          fprintf( '\n' );
          break;
       end
    else
       fprintf( '\n' );
    end

    
    if v <= MInf
       status = 'unbounded';
       fprintf( '\n' );
       break;
    end

    % compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -

    % possibly plot the trajectory
    if n == 2 && Plotf == 1
       PXY = [ x ,  lastx ];
       line( 'XData' , PXY( 1 , : ) , 'YData' , PXY( 2 , : ) , ...
             'LineStyle' , '-' , 'LineWidth' , 2 ,  'Marker' , 'o' , ...
             'Color' , [ 0 0 0 ] );
    end
    
    x = lastx;
  
    % update gradient - - - - - - - - - - - - - - - - - - - - - - - - - - -

    g = lastg;
    ng = norm( g );

    % iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

    if Interactive
       pause;
    end
end

% end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

function [ phi , varargout ] = f2phi( alpha )
%
% computes and returns the value of the tomography at alpha
%
%    phi( alpha ) = f( x - alpha * g )
%
% if Plotf > 2 saves the data in gap() for plotting
%
% if the second output parameter is required, put there the derivative
% of the tomography in alpha
%
%    phi'( alpha ) = < \nabla f( x - alpha * g ) , - g >
%
% saves the point in lastx, the gradient in lastg and increases feval

   lastx = x - alpha * g;
   [ phi , lastg ] = f( lastx );

   if Plotf > 2
      if fStar > - Inf
         gap( end + 1 ) = ( phi - fStar ) / max( [ abs( fStar ) 1 ] );
      else
         gap( end + 1 ) = phi;
      end
   end
   
   if nargout > 1
      varargout{ 1 } = - g' * lastg;
   end
   
   feval = feval + 1;
end

% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

function [ a , phia ] = ArmijoWolfeLS( phi0 , phip0 , as , m1 , m2 , tau )

% performs an Armijo-Wolfe Line Search.
%
% Inputs:
%
% - phi0 = phi( 0 )
%
% - phip0 = phi'( 0 ) (< 0)
%
% - as (> 0) is the first value to be tested: if the Armijo condition
%
%     phi( as ) <= phi0 + m1 * as * phip0
%
%   is satisfied but the Wolfe condition is not, which means that the
%   derivative in as is still negative, which means that longer steps
%   might be possible), then as is divided by tau < 1 (hence it is
%   increased) until this does not happen any longer
%
% - m1 (> 0 and < 1, typically small, like 0.01) is the parameter of
%   the Armijo condition
%
% - m2 (> m1 > 0, typically large, like 0.9) is the parameter of the
%   Wolfe condition
%
% - tau (> 0 and < 1) is the increasing coefficient for the first phase
%   (extrapolation)
%
% Outputs:
%
% - a is the "optimal" step
%
% - phia = phi( a ) (the "optimal" f-value)

lsiter = 1;  % count iterations of first phase
while feval <= MaxFeval 
   [ phia , phips ] = f2phi( as );    % compute phi( a ) and phi'( a )

   if phia > phi0 + m1 * as * phip0   % Armijo not satisfied
      break;
   end
 
   if phips >= m2 * phip0             % Wolfe satisfied
      fprintf( ' %2d   ' , lsiter );
      a = as;
      return;                         % Armijo + Wolfe satisfied, done
   end
   if phips >= 0  % derivative is positive, break
      break;
   end
   as = as / tau;
   lsiter = lsiter + 1;
end    

fprintf( ' %2d ' , lsiter );
lsiter = 1;  % count iterations of second phase

am = 0;
a = as;
phipm = phip0;
while ( feval <= MaxFeval ) && ( ( as - am ) ) > abs( mina ) && ...
      ( abs( phips ) > 1e-12 )

   if ( phipm < 0 ) && ( phips > 0 )
      % if the derivative in as is positive and that in am is negative,
      % then compute the new step by safeguarded quadratic interpolation
      a = ( am * phips - as * phipm ) / ( phips - phipm );
      a = max( [ am + ( as - am ) * sfgrd ...
               min( [ as - ( as - am ) * sfgrd  a ] ) ] );
   else
      a = ( as - am ) / 2;  % else just use dumb binary search
   end

   [ phia , phipa ] = f2phi( a );      % compute phi( a ) and phi'( a )

   if phia <= phi0 + m1 * as * phip0   % Armijo satisfied
      if phipa >= m2 * phip0           % Wolfe satisfied
         break;                        % Armijo + Wolfe satisfied, done
      end

      am = a;         % Armijo is satisfied but Wolfe is not, i.e., the
      phipm = phipa;  % derivative is still negative: move the left
                      % endpoint of the interval to a
   else                                % Armijo not satisfied
      as = a;         % move the right endpoint of the interval to a
      phips = phipa;
   end

   lsiter = lsiter + 1;
end

fprintf( '%2d' , lsiter );

end

% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

function [ as , phia ] = BacktrackingLS( phi0 , phip0 , as , m1 , tau )

% performs a Backtracking Line Search.
%
% phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
%
% as > 0 is the first value to be tested, which is decreased by
% multiplying it by tau < 1 until the Armijo condition with parameter
% m1 is satisfied
%
% returns the optimal step and the optimal f-value

lsiter = 1;  % count ls iterations
while feval <= MaxFeval && as > mina
   [ phia , ~ ] = f2phi( as );
   if phia <= phi0 + m1 * as * phip0  % Armijo satisfied
      break;                          % we are done
   end
   as = as * tau;
   lsiter = lsiter + 1;
end

fprintf( '\t%2d' , lsiter );

end

% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

end  % the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
lesson 9/11 2023-11-17 12:42:12 +01:00			`function [ x , status ] = SDG( f , varargin )`

			`%function [ x , status ] = SDG( f , x , astart , eps , MaxFeval , m1 , ...`
			`% m2 , tau , sfgrd , MInf , mina )`
			`%`
			`% Apply the classical Steepest Descent algorithm for the minimization of`
			`% the provided function f, which must have the following interface:`
			`%`
			`% [ v , g ] = f( x )`
			`%`
			`% Input:`
			`%`
			`% - x is either a [ n x 1 ] real (column) vector denoting the input of`
			`% f(), or [] (empty).`
			`%`
			`% Output:`
			`%`
			`% - v (real, scalar): if x == [] this is the best known lower bound on`
			`% the unconstrained global optimum of f(); it can be -Inf if either f()`
			`% is not bounded below, or no such information is available. If x ~= []`
			`% then v = f( x ).`
			`%`
			`% - g (real, [ n x 1 ] real vector): this also depends on x. if x == []`
			`% this is the standard starting point from which the algorithm should`
			`% start, otherwise it is the gradient of f() at x (or a subgradient if`
			`% f() is not differentiable at x, which it should not be if you are`
			`% applying the gradient method to it).`
			`%`
			`% The other [optional] input parameters are:`
			`%`
			`% - x (either [ n x 1 ] real vector or [], default []): starting point.`
			`% If x == [], the default starting point provided by f() is used.`
			`%`
			`% - astart (real scalar, optional, default value 1): if it is > 0, then it`
			`% is used as the starting value of alpha in the line search, be it the`
			`% Armijo-Wolfe or the Backtracking one. Otherwise, it is taken to mean`
			`% that no line search is to be performed, i.e., a fixed step approach`
			`% has to be used with step = - astart (hence, astart == 0 is an invald`
			`% setting in either case).`
			`%`
			`% - eps (real scalar, optional, default value 1e-6): the accuracy in the`
			`% stopping criterion: the algorithm is stopped when the norm of the`
			`% gradient is less than or equal to eps. If a negative value is provided,`
			`% this is used in a relative stopping criterion: the algorithm is`
			`% stopped when the norm of the gradient is less than or equal to`
			`% (- eps) * \|\| norm of the first gradient \|\|.`
			`%`
			`% - MaxFeval (integer scalar, optional, default value 1000): the maximum`
			`% number of function evaluations (hence, iterations will be not more than`
			`% MaxFeval because at each iteration at least a function evaluation is`
			`% performed, possibly more due to the line search).`
			`%`
			`% - m1 (real scalar, optional, must be in ( 0 , 1 ), default value 1e-3):`
			`% parameter of the Armijo condition (sufficient decrease) in the line`
			`% search`
			`%`
			`% - m2 (real scalar, optional, default value 0.9): typically the parameter`
			`% of the Wolfe condition (sufficient derivative increase) in the line`
			`% search. It should to be in ( 0 , 1 ); if not, it is taken to mean that`
			`% the simpler Backtracking line search should be used instead`
			`%`
			`% - tau (real scalar, optional, default value 0.9): scaling parameter for`
			`% the line search. In the Armijo-Wolfe line search it is used in the`
			`% first phase to identify a point where the Armijo condition is not`
			`% satisfied or the derivative is positive by divding the current`
			`% value (starting with astart, see above) by tau (which is < 1, hence it`
			`% is increased). In the Backtracking line search, each time the step is`
			`% multiplied by tau (hence it is decreased).`
			`%`
			`% - sfgrd (real scalar, optional, default value 0.01): safeguard parameter`
			`% for the line search. to avoid numerical problems that can occur with`
			`% the quadratic interpolation if the derivative at one endpoint is too`
			`% large w.r.t. the one at the other (which leads to choosing a point`
			`% extremely near to the other endpoint), a safeguarded version of`
			`% interpolation is used whereby the new point is chosen in the interval`
			`% [ as * ( 1 + sfgrd ) , am * ( 1 - sfgrd ) ], being [ as , am ] the`
			`% current interval, whatever quadratic interpolation says. If you`
			`% experiemce problems with the line search taking too many iterations to`
			`% converge at "nasty" points, try to increase this`
			`%`
			`% - MInf (real scalar, optional, default value -Inf): if the algorithm`
			`% determines a value for f() <= MInf this is taken as an indication that`
			`% the problem is unbounded below and computation is stopped`
			`% (a "finite -Inf").`
			`%`
			`% - mina (real scalar, optional, default value 1e-16): if the algorithm`
			`% determines a stepsize value <= mina, this is taken as an indication`
			`% that something has gone wrong (the gradient is not a direction of`
			`% descent, so maybe the function is not differentiable) and computation`
			`% is stopped. It is legal to take mina = 0, thereby in fact skipping this`
			`% test.`
			`%`
			`% Output:`
			`%`
			`% - x ([ n x 1 ] real column vector): the best solution found so far`
			`%`
			`% - status (string): a string describing the status of the algorithm at`
			`% termination`
			`%`
			`% = 'optimal': the algorithm terminated having proven that x is a(n`
			`% approximately) optimal solution, i.e., the norm of the gradient at x`
			`% is less than the required threshold`
			`%`
			`% = 'unbounded': the algorithm has determined an extrenely large negative`
			`% value for f() that is taken as an indication that the problem is`
			`% unbounded below (a "finite -Inf", see MInf above)`
			`%`
			`% = 'stopped': the algorithm terminated having exhausted the maximum`
			`% number of iterations: x is the bast solution found so far, but not`
			`% necessarily the optimal one`
			`%`
			`% = 'error': the algorithm found a numerical error that prevents it from`
			`% continuing optimization (see mina above)`
			`%`
			`%{`
			`=======================================`
			`Author: Antonio Frangioni`
			`Date: 27-04-23`
			`Version 1.30`
			`Copyright Antonio Frangioni`
			`=======================================`
			`%}`

			`Plotf = 1;`
			`% 0 = nothing is plotted`
			`% 1 = the level sets of f and the trajectory are plotted (when n = 2)`
			`% 2 = the function value / gap are plotted, iteration-wise`
			`% 3 = the function value / gap are plotted, function-evaluation-wise`

			`Interactive = true; % if we pause at every iteration`

			`if Plotf > 1`
			`if Plotf == 2`
			`MaxIter = 200; % expected number of iterations for the gap plot`
			`else`
			`MaxIter = 1000; % expected number of iterations for the gap plot`
			`end`
			`gap = [];`
			`end`

			`% reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -`
			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`if ~ isa( f , 'function_handle' )`
			`error( 'f not a function' );`
			`end`

			`if isempty( varargin ) \|\| isempty( varargin{ 1 } )`
			`[ fStar , x ] = f( [] );`
			`else`
			`x = varargin{ 1 };`
			`if ~ isreal( x )`
			`error( 'x not a real vector' );`
			`end`

			`if size( x , 2 ) ~= 1`
			`error( 'x is not a (column) vector' );`
			`end`

			`fStar = f( [] );`
			`end`

			`n = size( x , 1 );`

			`if length( varargin ) > 1`
			`astart = varargin{ 2 };`
			`if ~ isscalar( astart )`
			`error( 'astart is not a real scalar' );`
			`end`
			`if astart == 0`
			`error( 'astart must be != 0' );`
			`end`
			`else`
			`astart = 1;`
			`end`

			`if length( varargin ) > 2`
			`eps = varargin{ 3 };`
			`if ~ isreal( eps ) \|\| ~ isscalar( eps )`
			`error( 'eps is not a real scalar' );`
			`end`
			`else`
			`eps = 1e-6;`
			`end`

			`if length( varargin ) > 3`
			`MaxFeval = round( varargin{ 4 } );`
			`if ~ isscalar( MaxFeval )`
			`error( 'MaxFeval is not an integer scalar' );`
			`end`
			`else`
			`MaxFeval = 1000;`
			`end`

			`if length( varargin ) > 4`
			`m1 = varargin{ 5 };`
			`if ~ isscalar( m1 )`
			`error( 'm1 is not a real scalar' );`
			`end`
			`if m1 <= 0 \|\| m1 >= 1`
			`error( 'm1 is not in ( 0 , 1 )' );`
			`end`
			`else`
			`m1 = 1e-3;`
			`end`

			`if length( varargin ) > 5`
			`m2 = varargin{ 6 };`
			`if ~ isscalar( m1 )`
			`error( 'm2 is not a real scalar' );`
			`end`
			`else`
			`m2 = 0.9;`
			`end`

			`AWLS = ( m2 > 0 && m2 < 1 );`

			`if length( varargin ) > 6`
			`tau = varargin{ 7 };`
			`if ~ isscalar( tau )`
			`error( 'tau is not a real scalar' );`
			`end`
			`if tau <= 0 \|\| tau >= 1`
			`error( 'tau is not in ( 0 , 1 )' );`
			`end`
			`else`
			`tau = 0.9;`
			`end`

			`if length( varargin ) > 7`
			`sfgrd = varargin{ 8 };`
			`if ~ isscalar( sfgrd )`
			`error( 'sfgrd is not a real scalar' );`
			`end`
			`if sfgrd <= 0 \|\| sfgrd >= 1`
			`error( 'sfgrd is not in ( 0 , 1 )' );`
			`end`
			`else`
			`sfgrd = 0.01;`
			`end`

			`if length( varargin ) > 8`
			`MInf = varargin{ 9 };`
			`if ~ isscalar( MInf )`
			`error( 'MInf is not a real scalar' );`
			`end`
			`else`
			`MInf = - Inf;`
			`end`

			`if length( varargin ) > 9`
			`mina = varargin{ 10 };`
			`if ~ isscalar( mina )`
			`error( 'mina is not a real scalar' );`
			`end`
			`if mina < 0`
			`error( 'mina is < 0' );`
			`end`
			`else`
			`mina = 1e-16;`
			`end`

			`if Plotf > 1`
			`xlim( [ 0 MaxIter ] );`
			`ax = gca;`
			`ax.FontSize = 16;`
			`ax.Position = [ 0.03 0.07 0.95 0.92 ];`
			`ax.Toolbar.Visible = 'off';`
			`end`

			`% "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -`
			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`lastx = zeros( n , 1 ); % last point visited in the line search`
			`lastg = zeros( n , 1 ); % gradient of lastx`
			`feval = 1; % f() evaluations count ("common" with LSs)`

			`% initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`
			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`fprintf( 'Gradient method\n');`
			`if fStar > - Inf`
			`fprintf( 'feval\trel gap\t\t\|\| g(x) \|\|\trate\t');`
			`prevv = Inf;`
			`else`
			`fprintf( 'feval\tf(x)\t\t\t\|\| g(x) \|\|');`
			`end`
			`if astart > 0`
			`fprintf( '\tls feval\ta*' );`
			`end`
			`fprintf( '\n\n' );`

			`% compute first f-value and gradient in x^0 - - - - - - - - - - - - - - - -`

			`g = 0;`
			`v = f2phi( 0 );`
			`g = lastg;`

			`% compute norm of the (first) gradient- - - - - - - - - - - - - - - - - - -`

			`ng = norm( g );`
			`if eps < 0`
			`ng0 = - ng; % norm of first subgradient: why is there a "-"? ;-)`
			`else`
			`ng0 = 1; % un-scaled stopping criterion`
			`end`

			`% main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`
			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`while true`

			`% output statistics & plot gap/f-values - - - - - - - - - - - - - - - -`

			`if fStar > - Inf`
			`gapk = ( v - fStar ) / max( [ abs( fStar ) 1 ] );`

			`fprintf( '%4d\t%1.4e\t%1.4e' , feval , gapk , ng );`
			`if prevv < Inf`
			`fprintf( '\t%1.4e' , ( v - fStar ) / ( prevv - fStar ) );`
			`else`
			`fprintf( ' \t ' );`
			`end`
			`prevv = v;`

			`if Plotf > 1`
			`if Plotf >= 2`
			`gap( end + 1 ) = gapk;`
			`end`
			`semilogy( gap , 'Color' , 'k' , 'LineWidth' , 2 );`
			`if Plotf == 2`
			`ylim( [ 1e-15 1e+1 ] );`
			`else`
			`ylim( [ 1e-15 1e+4 ] );`
			`end`
			`%drawnow;`
			`end`
			`else`
			`fprintf( '%4d\t%1.8e\t\t%1.4e' , feval , v , ng );`

			`if Plotf > 1`
			`if Plotf >= 2`
			`gap( end + 1 ) = v;`
			`end`
			`plot( gap , 'Color' , 'k' , 'LineWidth' , 2 );`
			`%drawnow;`
			`end`
			`end`

			`% stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`if ng <= eps * ng0`
			`status = 'optimal';`
			`fprintf( '\n' );`
			`break;`
			`end`

			`if feval > MaxFeval`
			`status = 'stopped';`
			`fprintf( '\n' );`
			`break;`
			`end`

			`% compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`phip0 = - ng * ng;`

			`if astart < 0`
			`% fixed-step approach`

			`lastx = x + astart * g;`
			`[ v , lastg ] = f( lastx );`
			`feval = feval + 1;`
			`else`
			`% line-search approach, either Armijo-Wolfe or Backtracking`

			`if AWLS`
			`[ a , v ] = ArmijoWolfeLS( v , phip0 , astart , m1 , m2 , tau );`
			`else`
			`[ a , v ] = BacktrackingLS( v , phip0 , astart , m1 , tau );`
			`end`
			`end`

			`% output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`if astart > 0`
			`fprintf( '\t%1.4e\n' , a );`

			`if a <= mina`
			`status = 'error';`
			`fprintf( '\n' );`
			`break;`
			`end`
			`else`
			`fprintf( '\n' );`
			`end`


			`if v <= MInf`
			`status = 'unbounded';`
			`fprintf( '\n' );`
			`break;`
			`end`

			`% compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`% possibly plot the trajectory`
			`if n == 2 && Plotf == 1`
			`PXY = [ x , lastx ];`
			`line( 'XData' , PXY( 1 , : ) , 'YData' , PXY( 2 , : ) , ...`
			`'LineStyle' , '-' , 'LineWidth' , 2 , 'Marker' , 'o' , ...`
			`'Color' , [ 0 0 0 ] );`
			`end`

			`x = lastx;`

			`% update gradient - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`g = lastg;`
			`ng = norm( g );`

			`% iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`if Interactive`
			`pause;`
			`end`
			`end`

			`% end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -`
			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`
			`% inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`
			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`function [ phi , varargout ] = f2phi( alpha )`
			`%`
			`% computes and returns the value of the tomography at alpha`
			`%`
			`% phi( alpha ) = f( x - alpha * g )`
			`%`
			`% if Plotf > 2 saves the data in gap() for plotting`
			`%`
			`% if the second output parameter is required, put there the derivative`
			`% of the tomography in alpha`
			`%`
			`% phi'( alpha ) = < \nabla f( x - alpha * g ) , - g >`
			`%`
			`% saves the point in lastx, the gradient in lastg and increases feval`

			`lastx = x - alpha * g;`
			`[ phi , lastg ] = f( lastx );`

			`if Plotf > 2`
			`if fStar > - Inf`
			`gap( end + 1 ) = ( phi - fStar ) / max( [ abs( fStar ) 1 ] );`
			`else`
			`gap( end + 1 ) = phi;`
			`end`
			`end`

			`if nargout > 1`
			`varargout{ 1 } = - g' * lastg;`
			`end`

			`feval = feval + 1;`
			`end`

			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`
			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`function [ a , phia ] = ArmijoWolfeLS( phi0 , phip0 , as , m1 , m2 , tau )`

			`% performs an Armijo-Wolfe Line Search.`
			`%`
			`% Inputs:`
			`%`
			`% - phi0 = phi( 0 )`
			`%`
			`% - phip0 = phi'( 0 ) (< 0)`
			`%`
			`% - as (> 0) is the first value to be tested: if the Armijo condition`
			`%`
			`% phi( as ) <= phi0 + m1 * as * phip0`
			`%`
			`% is satisfied but the Wolfe condition is not, which means that the`
			`% derivative in as is still negative, which means that longer steps`
			`% might be possible), then as is divided by tau < 1 (hence it is`
			`% increased) until this does not happen any longer`
			`%`
			`% - m1 (> 0 and < 1, typically small, like 0.01) is the parameter of`
			`% the Armijo condition`
			`%`
			`% - m2 (> m1 > 0, typically large, like 0.9) is the parameter of the`
			`% Wolfe condition`
			`%`
			`% - tau (> 0 and < 1) is the increasing coefficient for the first phase`
			`% (extrapolation)`
			`%`
			`% Outputs:`
			`%`
			`% - a is the "optimal" step`
			`%`
			`% - phia = phi( a ) (the "optimal" f-value)`

			`lsiter = 1; % count iterations of first phase`
			`while feval <= MaxFeval`
			`[ phia , phips ] = f2phi( as ); % compute phi( a ) and phi'( a )`

			`if phia > phi0 + m1 * as * phip0 % Armijo not satisfied`
			`break;`
			`end`

			`if phips >= m2 * phip0 % Wolfe satisfied`
			`fprintf( ' %2d ' , lsiter );`
			`a = as;`
			`return; % Armijo + Wolfe satisfied, done`
			`end`
			`if phips >= 0 % derivative is positive, break`
			`break;`
			`end`
			`as = as / tau;`
			`lsiter = lsiter + 1;`
			`end`

			`fprintf( ' %2d ' , lsiter );`
			`lsiter = 1; % count iterations of second phase`

			`am = 0;`
			`a = as;`
			`phipm = phip0;`
			`while ( feval <= MaxFeval ) && ( ( as - am ) ) > abs( mina ) && ...`
			`( abs( phips ) > 1e-12 )`

			`if ( phipm < 0 ) && ( phips > 0 )`
			`% if the derivative in as is positive and that in am is negative,`
			`% then compute the new step by safeguarded quadratic interpolation`
			`a = ( am * phips - as * phipm ) / ( phips - phipm );`
			`a = max( [ am + ( as - am ) * sfgrd ...`
			`min( [ as - ( as - am ) * sfgrd a ] ) ] );`
			`else`
			`a = ( as - am ) / 2; % else just use dumb binary search`
			`end`

			`[ phia , phipa ] = f2phi( a ); % compute phi( a ) and phi'( a )`

			`if phia <= phi0 + m1 * as * phip0 % Armijo satisfied`
			`if phipa >= m2 * phip0 % Wolfe satisfied`
			`break; % Armijo + Wolfe satisfied, done`
			`end`

			`am = a; % Armijo is satisfied but Wolfe is not, i.e., the`
			`phipm = phipa; % derivative is still negative: move the left`
			`% endpoint of the interval to a`
			`else % Armijo not satisfied`
			`as = a; % move the right endpoint of the interval to a`
			`phips = phipa;`
			`end`

			`lsiter = lsiter + 1;`
			`end`

			`fprintf( '%2d' , lsiter );`

			`end`

			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`function [ as , phia ] = BacktrackingLS( phi0 , phip0 , as , m1 , tau )`

			`% performs a Backtracking Line Search.`
			`%`
			`% phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0`
			`%`
			`% as > 0 is the first value to be tested, which is decreased by`
			`% multiplying it by tau < 1 until the Armijo condition with parameter`
			`% m1 is satisfied`
			`%`
			`% returns the optimal step and the optimal f-value`

			`lsiter = 1; % count ls iterations`
			`while feval <= MaxFeval && as > mina`
			`[ phia , ~ ] = f2phi( as );`
			`if phia <= phi0 + m1 * as * phip0 % Armijo satisfied`
			`break; % we are done`
			`end`
			`as = as * tau;`
			`lsiter = lsiter + 1;`
			`end`

			`fprintf( '\t%2d' , lsiter );`

			`end`

			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`
			`% - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`

			`end % the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -`