using LinearAlgebra, Printf, Plots function NCG(f; x::Union{Nothing, Vector}=nothing, wf::Symbol=:Fletcher_Reeves, rstart::Integer=0, eps::Real=1e-6, astart::Real=1, MaxFeval::Integer=1000, m1::Real=0.01, m2::Real=0.9, tau::Real=0.9, sfgrd::Real=0.2, MInf::Real=-Inf, mina::Real=1e-16, plt::Union{Plots.Plot, Nothing}=nothing, plotatend::Bool=true, Plotf::Integer=0, printing::Bool=true)::Tuple{AbstractArray, String} #function [ x , status ] = NCG( f , x , wf , rstart , eps , astart , # MaxFeval , m1 , m2 , tau , sfgrd , MInf , # mina ) # # Apply a Nonlinear Conjugated Gradient algorithm for the minimiztion of # the provided function f, which must have the following interface: # # [ v , g ] = f( x ) # # Input: # # - x is either a [ n x 1 ] real (column) vector denoting the input of # f(), or [] (empty). # # Output: # # - v (real, scalar): if x == [] this is the best known lower bound on # the unconstrained global optimum of f(); it can be -Inf if either f() # is not bounded below, or no such information is available. If x ~= [] # then v = f(x). # # - g (real, [ n x 1 ] real vector): this also depends on x. if x == [] # this is the standard starting point from which the algorithm should # start, otherwise it is the gradient of f() at x (or a subgradient if # f() is not differentiable at x, which it should not be if you are # applying the gradient method to it). # # The other [optional] input parameters are: # # - x (either [ n x 1 ] real vector or [], default []): starting point. # If x == [], the default starting point provided by f() is used. # # - wf (integer scalar, optional, default value 0): which of the Nonlinear # Conjugated Gradient formulae to use. Possible values are: # = 0: Fletcher-Reeves # = 1: Polak-Ribiere # = 2: Hestenes-Stiefel # = 3: Dai-Yuan # # - rstart (integer scalar, optional, default value 0): if > 0, restarts # (setting beta = 0) are performed every n * rstart iterations # # - eps (real scalar, optional, default value 1e-6): the accuracy in the # stopping criterion: the algorithm is stopped when the norm of the # gradient is less than or equal to eps. If a negative value is provided, # this is used in a *relative* stopping criterion: the algorithm is # stopped when the norm of the gradient is less than or equal to # (- eps) * || norm of the first gradient ||. # # - astart (real scalar, optional, default value 1): starting value of # alpha in the line search (> 0) # # - MaxFeval (integer scalar, optional, default value 1000): the maximum # number of function evaluations (hence, iterations will be not more than # MaxFeval because at each iteration at least a function evaluation is # performed, possibly more due to the line search). # # - m1 (real scalar, optional, default value 0.01): first parameter of the # Armijo-Wolfe-type line search (sufficient decrease). Has to be in (0,1) # # - m2 (real scalar, optional, default value 0.9): typically the second # parameter of the Armijo-Wolfe-type line search (strong curvature # condition). It should to be in (0,1); if not, it is taken to mean that # the simpler Backtracking line search should be used instead # # - tau (real scalar, optional, default value 0.9): scaling parameter for # the line search. In the Armijo-Wolfe line search it is used in the # first phase: if the derivative is not positive, then the step is # divided by tau (which is < 1, hence it is increased). In the # Backtracking line search, each time the step is multiplied by tau # (hence it is decreased). # # - sfgrd (real scalar, optional, default value 0.2): safeguard parameter # for the line search. to avoid numerical problems that can occur with # the quadratic interpolation if the derivative at one endpoint is too # large w.r.t. the one at the other (which leads to choosing a point # extremely near to the other endpoint), a *safeguarded* version of # interpolation is used whereby the new point is chosen in the interval # [ as * ( 1 + sfgrd ) , am * ( 1 - sfgrd ) ], being [ as , am ] the # current interval, whatever quadratic interpolation says. If you # experiemce problems with the line search taking too many iterations to # converge at "nasty" points, try to increase this # # - MInf (real scalar, optional, default value -Inf): if the algorithm # determines a value for f() <= MInf this is taken as an indication that # the problem is unbounded below and computation is stopped # (a "finite -Inf"). # # - mina (real scalar, optional, default value 1e-16): if the algorithm # determines a stepsize value <= mina, this is taken as an indication # that something has gone wrong (the gradient is not a direction of # descent, so maybe the function is not differentiable) and computation # is stopped. It is legal to take mina = 0, thereby in fact skipping this # test. # # Output: # # - x ([ n x 1 ] real column vector): the best solution found so far. # # - status (string): a string describing the status of the algorithm at # termination # # = 'optimal': the algorithm terminated having proven that x is a(n # approximately) optimal solution, i.e., the norm of the gradient at x # is less than the required threshold # # = 'unbounded': the algorithm has determined an extrenely large negative # value for f() that is taken as an indication that the problem is # unbounded below (a "finite -Inf", see MInf above) # # = 'stopped': the algorithm terminated having exhausted the maximum # number of iterations: x is the bast solution found so far, but not # necessarily the optimal one # # = 'error': the algorithm found a numerical error that prevents it from # continuing optimization (see mina above) # #{ # ======================================= # Author: Antonio Frangioni # Date: 10-11-22 # Version 1.21 # Copyright Antonio Frangioni # ======================================= #} # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - function f2phi(alpha, derivate=false) # # computes and returns the value of the tomography at alpha # # phi( alpha ) = f( x + alpha * d ) # # if Plotf > 2 saves the data in gap() for plotting # # if the second output parameter is required, put there the derivative # of the tomography in alpha # # phi'( alpha ) = < \nabla f( x + alpha * d ) , d > # # saves the point in lastx, the gradient in lastg and increases feval lastx = x + alpha * d (phi, lastg, _) = f(lastx) if Plotf > 2 if fStar > - Inf push!(gap, (phi - fStar) / max(abs(fStar), 1)) else push!(gap, phi) end end feval += 1 if derivate return (phi, dot(d, lastg)) end return (phi, nothing) end # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - function ArmijoWolfeLS(phi0, phip0, as, m1, m2, tau) # performs an Armijo-Wolfe Line Search. # # phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0 # # as > 0 is the first value to be tested: if phi'( as ) < 0 then as is # divided by tau < 1 (hence it is increased) until this does not happen # any longer # # m1 and m2 are the standard Armijo-Wolfe parameters; note that the strong # Wolfe condition is used # # returns the optimal step and the optimal f-value lsiter = 1 # count iterations of first phase local phips, phia while feval ≤ MaxFeval phia, phips = f2phi(as, true) if (phia ≤ phi0 + m1 * as * phip0) && (abs(phips) ≤ - m2 * phip0) if printing @printf("\t%2d", lsiter) end a = as return (a, phia) # Armijo + strong Wolfe satisfied, we are done end if phips ≥ 0 break end as = as / tau lsiter += 1 end if printing @printf("\t%2d ", lsiter) end lsiter = 1 # count iterations of second phase am = 0 a = as phipm = phip0 while (feval ≤ MaxFeval ) && (as - am) > mina && (phips > 1e-12) # compute the new value by safeguarded quadratic interpolation a = (am * phips - as * phipm) / (phips - phipm) a = max(am + ( as - am ) * sfgrd, min(as - ( as - am ) * sfgrd, a)) # compute phi(a) phia, phip = f2phi(a, true) if (phia ≤ phi0 + m1 * a * phip0) && (abs(phip) ≤ -m2 * phip0) break # Armijo + strong Wolfe satisfied, we are done end # restrict the interval based on sign of the derivative in a if phip < 0 am = a phipm = phip else as = a if as ≤ mina break end phips = phip end lsiter += 1 end if printing @printf("%2d", lsiter) end return (a, phia) end # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - function BacktrackingLS( phi0 , phip0 , as , m1 , tau ) # performs a Backtracking Line Search. # # phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0 # # as > 0 is the first value to be tested, which is decreased by # multiplying it by tau < 1 until the Armijo condition with parameter # m1 is satisfied # # returns the optimal step and the optimal f-value lsiter = 1 # count ls iterations while feval ≤ MaxFeval && as > mina phia, _ = f2phi(as) if phia ≤ phi0 + m1 * as * phip0 # Armijo satisfied break # we are done end as *= tau lsiter += 1 end if printing @printf("\t%2d", lsiter) end return (as, phia) end # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Plotf = 1; # 1 = the level sets of f and the trajectory are plotted (when n = 2) # 2 = the function value / gap are plotted, iteration-wise # 3 = the function value / gap are plotted, function-evaluation-wise # all the rest: nothing is plotted Interactive = false # if we pause at every iteration local gap PXY = Matrix{Real}(undef, 2, 0) status = "error" if Plotf > 1 if Plotf == 2 MaxIter = 50 # expected number of iterations for the gap plot else MaxIter = 70 # expected number of iterations for the gap plot end gap = [] end # reading and checking input- - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if isnothing(x) (fStar, x, _) = f(nothing) else (Star, _, _) = f(nothing) end n = size(x, 1) if wf != :Fletcher_Reeves && wf != :Polak_Ribiere && wf != :Hestenes_Stiefel && wf != :Dai_Yuan error("unknown NCG formula $wf") end if astart ≤ 0 error("astart must be > 0") end if m1 ≤ 0 || m1 ≥ 1 error("m1 is not in (0 ,1)") end AWLS = (m2 > 0 && m2 < 1) if tau ≤ 0 || tau ≥ 1 error("tau is not in (0 ,1)") end if sfgrd ≤ 0 || sfgrd ≥ 1 error("sfgrd is not in (0, 1)") end if mina < 0 error("mina is < 0") end # "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - lastx = zeros(n) # last point visited in the line search lastg = zeros(n) # gradient of lastx d = zeros(n) # NGC's direction feval = 0 # f() evaluations count ("common" with LSs) # initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if printing @printf("NCG method ") if wf == :Fletcher_Reeves @printf("(Fletcher-Reeves)\n") elseif wf == :Polak_Ribiere @printf("(Polak-Ribiere)\n") elseif wf == :Hestenes_Stiefel @printf("(Hestenes-Stiefel)\n") elseif wf == :Dai_Yuan @printf("(Dai-Yuan)\n") end if fStar > - Inf @printf("feval\trel gap") else @printf("feval\tf(x)") end @printf("\t\t|| g(x) ||\tbeta\tls feval\ta*\n\n") end if Plotf > 1 && isnothing(plt) if Plotf == 2 plt = plot(xlims=(0, MaxIter), ylims=(1e-15, 1e+1), yscale=:log10) else plt = plot(xlims=(0, MaxIter), ylims=(1e-15, 1e+4), yscale=:log10) end elseif isnothing(plt) plt = plot() end v, _ = f2phi(0) ng = norm(lastg) if eps < 0 ng0 = -ng # norm of first subgradient: why is there a "-"? ;-) else ng0 = 1 # un-scaled stopping criterion end iter = 1 # iterations count (as distinguished from f() evals) local pastg, pastd # main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - while true # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - - if fStar > -Inf gapk = ( v - fStar ) / max(abs(fStar), 1) if printing @printf("%4d\t%1.4e\t%1.4e", feval, gapk, ng) end if Plotf > 1 if Plotf == 2 push!(gap, gapk) end end else if printing @printf("%4d\t%1.8e\t\t%1.4e", feval, v, ng) end if Plotf > 1 if Plotf == 2 push!(gap, v) end end end # stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - - if ng ≤ eps * ng0 status = "optimal" break end if feval > MaxFeval status = "stopped" break end # compute search direction- - - - - - - - - - - - - - - - - - - - - - - # formulae could be streamlined somewhat and some norms could be saved # from previous iterations if iter == 1 # first iteration is off-line, standard gradient d = -lastg if printing @printf("\t") end else # normal iterations, use appropriate NCG formula if rstart > 0 && mod( iter , n * rstart ) == 0 # ... unless a restart is being prformed beta = 0 if printing @printf("\t(res)") end else if wf == :Fletcher_Reeves beta = (ng / norm(pastg))^2 elseif wf == :Polak_Ribiere beta = (dot(lastg, (lastg - pastg))) / norm(pastg)^2 beta = max(beta, 0) elseif wf == :Hestenes_Stiefel beta = (dot(lastg, (lastg - pastg))) / (dot((lastg - pastg), pastd)) if beta < 0 beta = 0 end elseif wf == :Dai_Yuan beta = ng^2 / (dot((lastg - pastg), pastd)) end if printing @printf("\t%1.4f", beta) end end if beta != 0 d = -lastg + beta * pastd else d = -lastg end end pastg = lastg # previous gradient pastd = d # previous search direction # compute step size - - - - - - - - - - - - - - - - - - - - - - - - - - phip0 = dot(lastg, d) if AWLS (a, v) = ArmijoWolfeLS(v, phip0, astart, m1, m2, tau) else (a, v) = BacktrackingLS(v, phip0, astart, m1, tau) end # output statistics - - - - - - - - - - - - - - - - - - - - - - - - - - if printing @printf("\t%1.2e\n", a) end if a ≤ mina status = "error" break end if v <= MInf status = "unbounded" break end # compute new point - - - - - - - - - - - - - - - - - - - - - - - - - - # possibly plot the trajectory if n == 2 && Plotf == 1 PXY = hcat(PXY, hcat(x, lastx)) end x = lastx ng = norm(lastg) # iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iter += 1 if Interactive readline() end end # end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if Plotf ≥ 2 plot!(plt, gap) elseif Plotf == 1 && n == 2 plot!(plt, PXY[1, :], PXY[2, :]) end if plotatend display(plt) end return (x, status) end # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -