Lesson 23/11 with methods from previeus lessons

This commit is contained in:
elvis
2023-11-30 15:24:44 +01:00
parent 65c8842e83
commit c828453e94
4 changed files with 4793 additions and 0 deletions

266
11-23/HBG.jl Normal file
View File

@ -0,0 +1,266 @@
using LinearAlgebra, Printf, Plots
function HBG(f;
x::Union{Nothing, Vector}=nothing,
alpha::Real=1,
beta::Real=0.9,
eps::Real=1e-6,
MaxIter::Integer=300,
MInf::Real=-Inf,
plt::Union{Plots.Plot, Nothing}=nothing,
plotatend::Bool=true,
Plotf::Integer=0,
printing::Bool=true)::Tuple{AbstractArray, String}
#function [ x , status ] = HBG( f , x , alpha , beta , eps , MaxIter ,
# MInf )
#
# Apply a Heavy Ball Gradient approach for the minimization of the
# provided function f, which must have the following interface:
#
# [ v , g ] = f( x )
#
# Input:
#
# - x is either a [ n x 1 ] real (column) vector denoting the input of
# f(), or [] (empty).
#
# Output:
#
# - v (real, scalar): if x == [] this is the best known lower bound on
# the unconstrained global optimum of f(); it can be -Inf if either f()
# is not bounded below, or no such information is available. If x ~= []
# then v = f(x).
#
# - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
# this is the standard starting point from which the algorithm should
# start, otherwise it is the gradient of f() at x (or a subgradient if
# f() is not differentiable at x, which it should not be if you are
# applying the gradient method to it).
#
# The other [optional] input parameters are:
#
# - x (either [ n x 1 ] real vector or [], default []): starting point.
# If x == [], the default starting point provided by f() is used.
#
# - alpha (real scalar, optional, default value 1): the fixed stepsize of
# the Heavy Ball Gradient approach (along the anti-gradient).
#
# - beta (real scalar, optional, default value 0.9): the fixed weight of
# the momentum term
#
# beta * || x^i - x^{i - 1} ||
#
# Note that beta has to be >= 0, although 0 is accepted which turns the
# Heavy Ball Gradient approach into a "Light" Ball Gradient approach,
# i.e., a standard Gradient approach with fixed stepsize.
#
# - eps (real scalar, optional, default value 1e-6): the accuracy in the
# stopping criterion: the algorithm is stopped when the norm of the
# gradient is less than or equal to eps. If a negative value is provided,
# this is used in a *relative* stopping criterion: the algorithm is
# stopped when the norm of the gradient is less than or equal to
# (- eps) * || norm of the first gradient ||.
#
# - MaxIter (integer scalar, optional, default value 300): the maximum
# number of iterations == function evaluations.
#
# - MInf (real scalar, optional, default value -Inf): if the algorithm
# determines a value for f() <= MInf this is taken as an indication that
# the problem is unbounded below and computation is stopped
# (a "finite -Inf").
#
# Output:
#
# - x ([ n x 1 ] real column vector): the best solution found so far.
#
# - status (string): a string describing the status of the algorithm at
# termination
#
# = 'optimal': the algorithm terminated having proven that x is a(n
# approximately) optimal solution, i.e., the norm of the gradient at x
# is less than the required threshold
#
# = 'unbounded': the algorithm has determined an extrenely large negative
# value for f() that is taken as an indication that the problem is
# unbounded below (a "finite -Inf", see MInf above)
#
# = 'stopped': the algorithm terminated having exhausted the maximum
# number of iterations: x is the bast solution found so far, but not
# necessarily the optimal one
#
# = 'error': the algorithm found a numerical error that prevents it from
# continuing optimization (see mina above)
#
#{
# =======================================
# Author: Antonio Frangioni
# Date: 10-11-22
# Version 1.01
# Copyright Antonio Frangioni
# =======================================
#}
# Plotf = 1;
# 0 = nothing is plotted
# 1 = the level sets of f and the trajectory are plotted (when n = 2)
# 2 = the function value / gap are plotted
local gap
if Plotf == 2
gap = []
end
PXY = Matrix{Real}(undef, 2, 0)
Interactive = false # if we pause at every iteration
# reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if isnothing(x)
(fStar, x, _) = f(nothing)
else
(fStar, _, _) = f(nothing)
end
n = size(x, 1)
if alpha 0
error("alpha must be positive")
end
if beta < 0
error("beta must be non-negative")
end
# initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if printing
@printf("Heavy Ball Gradient method\n")
if fStar > -Inf
@printf("feval\trel gap\t\tbest gap")
else
@printf("feval\tf(x)\tfbest")
end
@printf("\t|| g(x) ||\n\n")
end
if Plotf == 2 && isnothing(plt)
plt = plot(xlims=(0, MaxIter))
elseif isnothing(plt)
plt = plot()
end
(v, g, _) = f(x)
ng = norm(g)
vbest = v
local ng0
if eps < 0
ng0 = -ng # norm of first subgradient: why is there a "-"? ;-)
else
ng0 = 1 # un-scaled stopping criterion
end
pastd = zeros(n) # the direction at the previous iteration
feval = 1 # f() evaluations count
status = "error"
# main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
while true
# output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if fStar > -Inf
gapk = (v - fStar)/max(abs(fStar), 1)
bstgapk = (vbest - fStar)/max(abs(fStar), 1)
if printing
@printf("%4d\t%1.4e\t%1.4e\t%1.4e\n", feval, gapk, bstgapk, ng)
end
if Plotf == 2
push!(gap, gapk)
end
else
if printing
@printf("%4d\t%1.8e\t%1.8e\t\t%1.4e\n", feval, v, vbest, ng)
end
if Plotf == 2
push!(gap, v)
end
end
# stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
if ng eps * ng0
status = "optimal"
break
end
if feval > MaxIter
status = "stopped"
break
end
if v MInf
status = "unbounded"
break
end
# compute deflected gradient direction- - - - - - - - - - - - - - - - -
d = -alpha * g .+ beta * pastd
# compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
# possibly plot the trajectory
if n == 2 && Plotf == 1
PXY = hcat(PXY, hcat(x, x + d))
end
x += d
pastd .= d
# compute f() - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
(v, g, _) = f(x)
ng = norm(g)
if v < vbest
vbest = v
end
feval += 1
# iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if Interactive
l = readline()
if l == "exit"
break
end
end
end
# end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if plotatend
if Plotf 2
plot!(plt, gap)
elseif Plotf == 1 && n == 2
plot!(plt, PXY[1, :], PXY[2, :])
end
display(plt)
end
return (x, status)
end # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

555
11-23/NCG.jl Normal file
View File

@ -0,0 +1,555 @@
using LinearAlgebra, Printf, Plots
function NCG(f;
x::Union{Nothing, Vector}=nothing,
wf::Symbol=:Fletcher_Reeves,
rstart::Integer=0,
eps::Real=1e-6,
astart::Real=1,
MaxFeval::Integer=1000,
m1::Real=0.01,
m2::Real=0.9,
tau::Real=0.9,
sfgrd::Real=0.2,
MInf::Real=-Inf,
mina::Real=1e-16,
plt::Union{Plots.Plot, Nothing}=nothing,
plotatend::Bool=true,
Plotf::Integer=0,
printing::Bool=true)::Tuple{AbstractArray, String}
#function [ x , status ] = NCG( f , x , wf , rstart , eps , astart ,
# MaxFeval , m1 , m2 , tau , sfgrd , MInf ,
# mina )
#
# Apply a Nonlinear Conjugated Gradient algorithm for the minimiztion of
# the provided function f, which must have the following interface:
#
# [ v , g ] = f( x )
#
# Input:
#
# - x is either a [ n x 1 ] real (column) vector denoting the input of
# f(), or [] (empty).
#
# Output:
#
# - v (real, scalar): if x == [] this is the best known lower bound on
# the unconstrained global optimum of f(); it can be -Inf if either f()
# is not bounded below, or no such information is available. If x ~= []
# then v = f(x).
#
# - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
# this is the standard starting point from which the algorithm should
# start, otherwise it is the gradient of f() at x (or a subgradient if
# f() is not differentiable at x, which it should not be if you are
# applying the gradient method to it).
#
# The other [optional] input parameters are:
#
# - x (either [ n x 1 ] real vector or [], default []): starting point.
# If x == [], the default starting point provided by f() is used.
#
# - wf (integer scalar, optional, default value 0): which of the Nonlinear
# Conjugated Gradient formulae to use. Possible values are:
# = 0: Fletcher-Reeves
# = 1: Polak-Ribiere
# = 2: Hestenes-Stiefel
# = 3: Dai-Yuan
#
# - rstart (integer scalar, optional, default value 0): if > 0, restarts
# (setting beta = 0) are performed every n * rstart iterations
#
# - eps (real scalar, optional, default value 1e-6): the accuracy in the
# stopping criterion: the algorithm is stopped when the norm of the
# gradient is less than or equal to eps. If a negative value is provided,
# this is used in a *relative* stopping criterion: the algorithm is
# stopped when the norm of the gradient is less than or equal to
# (- eps) * || norm of the first gradient ||.
#
# - astart (real scalar, optional, default value 1): starting value of
# alpha in the line search (> 0)
#
# - MaxFeval (integer scalar, optional, default value 1000): the maximum
# number of function evaluations (hence, iterations will be not more than
# MaxFeval because at each iteration at least a function evaluation is
# performed, possibly more due to the line search).
#
# - m1 (real scalar, optional, default value 0.01): first parameter of the
# Armijo-Wolfe-type line search (sufficient decrease). Has to be in (0,1)
#
# - m2 (real scalar, optional, default value 0.9): typically the second
# parameter of the Armijo-Wolfe-type line search (strong curvature
# condition). It should to be in (0,1); if not, it is taken to mean that
# the simpler Backtracking line search should be used instead
#
# - tau (real scalar, optional, default value 0.9): scaling parameter for
# the line search. In the Armijo-Wolfe line search it is used in the
# first phase: if the derivative is not positive, then the step is
# divided by tau (which is < 1, hence it is increased). In the
# Backtracking line search, each time the step is multiplied by tau
# (hence it is decreased).
#
# - sfgrd (real scalar, optional, default value 0.2): safeguard parameter
# for the line search. to avoid numerical problems that can occur with
# the quadratic interpolation if the derivative at one endpoint is too
# large w.r.t. the one at the other (which leads to choosing a point
# extremely near to the other endpoint), a *safeguarded* version of
# interpolation is used whereby the new point is chosen in the interval
# [ as * ( 1 + sfgrd ) , am * ( 1 - sfgrd ) ], being [ as , am ] the
# current interval, whatever quadratic interpolation says. If you
# experiemce problems with the line search taking too many iterations to
# converge at "nasty" points, try to increase this
#
# - MInf (real scalar, optional, default value -Inf): if the algorithm
# determines a value for f() <= MInf this is taken as an indication that
# the problem is unbounded below and computation is stopped
# (a "finite -Inf").
#
# - mina (real scalar, optional, default value 1e-16): if the algorithm
# determines a stepsize value <= mina, this is taken as an indication
# that something has gone wrong (the gradient is not a direction of
# descent, so maybe the function is not differentiable) and computation
# is stopped. It is legal to take mina = 0, thereby in fact skipping this
# test.
#
# Output:
#
# - x ([ n x 1 ] real column vector): the best solution found so far.
#
# - status (string): a string describing the status of the algorithm at
# termination
#
# = 'optimal': the algorithm terminated having proven that x is a(n
# approximately) optimal solution, i.e., the norm of the gradient at x
# is less than the required threshold
#
# = 'unbounded': the algorithm has determined an extrenely large negative
# value for f() that is taken as an indication that the problem is
# unbounded below (a "finite -Inf", see MInf above)
#
# = 'stopped': the algorithm terminated having exhausted the maximum
# number of iterations: x is the bast solution found so far, but not
# necessarily the optimal one
#
# = 'error': the algorithm found a numerical error that prevents it from
# continuing optimization (see mina above)
#
#{
# =======================================
# Author: Antonio Frangioni
# Date: 10-11-22
# Version 1.21
# Copyright Antonio Frangioni
# =======================================
#}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function f2phi(alpha, derivate=false)
#
# computes and returns the value of the tomography at alpha
#
# phi( alpha ) = f( x + alpha * d )
#
# if Plotf > 2 saves the data in gap() for plotting
#
# if the second output parameter is required, put there the derivative
# of the tomography in alpha
#
# phi'( alpha ) = < \nabla f( x + alpha * d ) , d >
#
# saves the point in lastx, the gradient in lastg and increases feval
lastx = x + alpha * d
(phi, lastg, _) = f(lastx)
if Plotf > 2
if fStar > - Inf
push!(gap, (phi - fStar) / max(abs(fStar), 1))
else
push!(gap, phi)
end
end
feval += 1
if derivate
return (phi, dot(d, lastg))
end
return (phi, nothing)
end
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function ArmijoWolfeLS(phi0, phip0, as, m1, m2, tau)
# performs an Armijo-Wolfe Line Search.
#
# phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
#
# as > 0 is the first value to be tested: if phi'( as ) < 0 then as is
# divided by tau < 1 (hence it is increased) until this does not happen
# any longer
#
# m1 and m2 are the standard Armijo-Wolfe parameters; note that the strong
# Wolfe condition is used
#
# returns the optimal step and the optimal f-value
lsiter = 1 # count iterations of first phase
local phips, phia
while feval MaxFeval
phia, phips = f2phi(as, true)
if (phia phi0 + m1 * as * phip0) && (abs(phips) - m2 * phip0)
if printing
@printf("\t%2d", lsiter)
end
a = as
return (a, phia) # Armijo + strong Wolfe satisfied, we are done
end
if phips 0
break
end
as = as / tau
lsiter += 1
end
if printing
@printf("\t%2d ", lsiter)
end
lsiter = 1 # count iterations of second phase
am = 0
a = as
phipm = phip0
while (feval MaxFeval ) && (as - am) > mina && (phips > 1e-12)
# compute the new value by safeguarded quadratic interpolation
a = (am * phips - as * phipm) / (phips - phipm)
a = max(am + ( as - am ) * sfgrd, min(as - ( as - am ) * sfgrd, a))
# compute phi(a)
phia, phip = f2phi(a, true)
if (phia phi0 + m1 * a * phip0) && (abs(phip) -m2 * phip0)
break # Armijo + strong Wolfe satisfied, we are done
end
# restrict the interval based on sign of the derivative in a
if phip < 0
am = a
phipm = phip
else
as = a
if as mina
break
end
phips = phip
end
lsiter += 1
end
if printing
@printf("%2d", lsiter)
end
return (a, phia)
end
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
function BacktrackingLS( phi0 , phip0 , as , m1 , tau )
# performs a Backtracking Line Search.
#
# phi0 = phi( 0 ), phip0 = phi'( 0 ) < 0
#
# as > 0 is the first value to be tested, which is decreased by
# multiplying it by tau < 1 until the Armijo condition with parameter
# m1 is satisfied
#
# returns the optimal step and the optimal f-value
lsiter = 1 # count ls iterations
while feval MaxFeval && as > mina
phia, _ = f2phi(as)
if phia phi0 + m1 * as * phip0 # Armijo satisfied
break # we are done
end
as *= tau
lsiter += 1
end
if printing
@printf("\t%2d", lsiter)
end
return (as, phia)
end
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Plotf = 1;
# 1 = the level sets of f and the trajectory are plotted (when n = 2)
# 2 = the function value / gap are plotted, iteration-wise
# 3 = the function value / gap are plotted, function-evaluation-wise
# all the rest: nothing is plotted
Interactive = false # if we pause at every iteration
local gap
PXY = Matrix{Real}(undef, 2, 0)
status = "error"
if Plotf > 1
if Plotf == 2
MaxIter = 50 # expected number of iterations for the gap plot
else
MaxIter = 70 # expected number of iterations for the gap plot
end
gap = []
end
# reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if isnothing(x)
(fStar, x, _) = f(nothing)
else
(Star, _, _) = f(nothing)
end
n = size(x, 1)
if wf != :Fletcher_Reeves && wf != :Polak_Ribiere && wf != :Hestenes_Stiefel &&
wf != :Dai_Yuan
error("unknown NCG formula $wf")
end
if astart 0
error("astart must be > 0")
end
if m1 0 || m1 1
error("m1 is not in (0 ,1)")
end
AWLS = (m2 > 0 && m2 < 1)
if tau 0 || tau 1
error("tau is not in (0 ,1)")
end
if sfgrd 0 || sfgrd 1
error("sfgrd is not in (0, 1)")
end
if mina < 0
error("mina is < 0")
end
# "global" variables- - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
lastx = zeros(n) # last point visited in the line search
lastg = zeros(n) # gradient of lastx
d = zeros(n) # NGC's direction
feval = 0 # f() evaluations count ("common" with LSs)
# initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if printing
@printf("NCG method ")
if wf == :Fletcher_Reeves
@printf("(Fletcher-Reeves)\n")
elseif wf == :Polak_Ribiere
@printf("(Polak-Ribiere)\n")
elseif wf == :Hestenes_Stiefel
@printf("(Hestenes-Stiefel)\n")
elseif wf == :Dai_Yuan
@printf("(Dai-Yuan)\n")
end
if fStar > - Inf
@printf("feval\trel gap")
else
@printf("feval\tf(x)")
end
@printf("\t\t|| g(x) ||\tbeta\tls feval\ta*\n\n")
end
if Plotf > 1 && isnothing(plt)
if Plotf == 2
plt = plot(xlims=(0, MaxIter), ylims=(1e-15, 1e+1), yscale=:log10)
else
plt = plot(xlims=(0, MaxIter), ylims=(1e-15, 1e+4), yscale=:log10)
end
elseif isnothing(plt)
plt = plot()
end
v, _ = f2phi(0)
ng = norm(lastg)
if eps < 0
ng0 = -ng # norm of first subgradient: why is there a "-"? ;-)
else
ng0 = 1 # un-scaled stopping criterion
end
iter = 1 # iterations count (as distinguished from f() evals)
local pastg, pastd
# main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
while true
# output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if fStar > -Inf
gapk = ( v - fStar ) / max(abs(fStar), 1)
if printing
@printf("%4d\t%1.4e\t%1.4e", feval, gapk, ng)
end
if Plotf > 1
if Plotf == 2
push!(gap, gapk)
end
end
else
if printing
@printf("%4d\t%1.8e\t\t%1.4e", feval, v, ng)
end
if Plotf > 1
if Plotf == 2
push!(gap, v)
end
end
end
# stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
if ng eps * ng0
status = "optimal"
break
end
if feval > MaxFeval
status = "stopped"
break
end
# compute search direction- - - - - - - - - - - - - - - - - - - - - - -
# formulae could be streamlined somewhat and some norms could be saved
# from previous iterations
if iter == 1 # first iteration is off-line, standard gradient
d = -lastg
if printing
@printf("\t")
end
else # normal iterations, use appropriate NCG formula
if rstart > 0 && mod( iter , n * rstart ) == 0
# ... unless a restart is being prformed
beta = 0
if printing
@printf("\t(res)")
end
else
if wf == :Fletcher_Reeves
beta = (ng / norm(pastg))^2
elseif wf == :Polak_Ribiere
beta = (dot(lastg, (lastg - pastg))) / norm(pastg)^2
beta = max(beta, 0)
elseif wf == :Hestenes_Stiefel
beta = (dot(lastg, (lastg - pastg))) / (dot((lastg - pastg), pastd))
if beta < 0
beta = 0
end
elseif wf == :Dai_Yuan
beta = ng^2 / (dot((lastg - pastg), pastd))
end
if printing
@printf("\t%1.4f", beta)
end
end
if beta != 0
d = -lastg + beta * pastd
else
d = -lastg
end
end
pastg = lastg # previous gradient
pastd = d # previous search direction
# compute step size - - - - - - - - - - - - - - - - - - - - - - - - - -
phip0 = dot(lastg, d)
if AWLS
(a, v) = ArmijoWolfeLS(v, phip0, astart, m1, m2, tau)
else
(a, v) = BacktrackingLS(v, phip0, astart, m1, tau)
end
# output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if printing
@printf("\t%1.2e\n", a)
end
if a mina
status = "error"
break
end
if v <= MInf
status = "unbounded"
break
end
# compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
# possibly plot the trajectory
if n == 2 && Plotf == 1
PXY = hcat(PXY, hcat(x, lastx))
end
x = lastx
ng = norm(lastg)
# iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iter += 1
if Interactive
readline()
end
end
# end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if Plotf 2
plot!(plt, gap)
elseif Plotf == 1 && n == 2
plot!(plt, PXY[1, :], PXY[2, :])
end
if plotatend
display(plt)
end
return (x, status)
end # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

339
11-23/SGM.jl Normal file
View File

@ -0,0 +1,339 @@
using LinearAlgebra, Printf, Plots
function SGM(f;
x::Union{Nothing, Vector}=nothing,
eps::Real=1e-6,
astart::Real=1e-4,
tau::Real=0.96,
MaxFeval::Integer=300,
MInf::Real=-Inf,
mina::Real=1e-16,
plt::Union{Plots.Plot, Nothing}=nothing,
plotatend::Bool=true,
Plotf::Integer=0,
printing::Bool=true)::Tuple{AbstractArray, String}
# function [ x , status ] = SGM( f , x , eps , astart , tau , MaxFeval ,
# MInf , mina )
#
# Apply the classical Subgradient Method for the minimization of the
# provided function f, which must have the following interface:
#
# [ v , g ] = f( x )
#
# Input:
#
# - x is either a [ n x 1 ] real (column) vector denoting the input of
# f(), or [] (empty).
#
# Output:
#
# - v (real, scalar): if x == [] this is the best known lower bound on
# the unconstrained global optimum of f(); it can be -Inf if either f()
# is not bounded below, or no such information is available. If x ~= []
# then v = f(x).
#
# - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
# this is the standard starting point from which the algorithm should
# start, otherwise it is a subgradient of f() at x (possibly the
# gradient, but you should not apply this algorithm to a differentiable
# f)
#
# The other [optional] input parameters are:
#
# - x (either [ n x 1 ] real vector or [], default []): starting point.
# If x == [], the default starting point provided by f() is used.
#
# - eps (real scalar, optional, default value 1e-6): the accuracy in the
# stopping criterion. If eps > 0, then a target-level Polyak stepsize
# with nonvanishing threshold is used, and eps is taken as the minimum
# *relative* value for the displacement, i.e.,
#
# delta^i >= eps * max( abs( f( x^i ) ) , 1 )
#
# is used as the minimum value for the displacement. If eps < 0 and
# v_* = f( [] ) > -Inf, then the algorithm "cheats" and it does an
# *exact* Polyak stepsize with termination criteria
#
# ( f^i_{ref} - v_* ) <= ( - eps ) * max( abs( v_* ) , 1 )
#
# Finally, if eps == 0 the algorithm rather uses a DSS (diminishing
# square-summable) stepsize, i.e., astart * ( 1 / i ) [see below]
#
# - astart (real scalar, optional, default value 1e-4): if eps > 0, i.e.,
# a target-level Polyak stepsize with nonvanishing threshold is used,
# then astart is used as the relative value to which the displacement is
# reset each time f( x^{i + 1} ) <= f^i_{ref} - delta^i, i.e.,
#
# delta^{i + 1} = astart * max( abs( f^{i + 1}_{ref} ) , 1 )
#
# If eps == 0, i.e. a diminishing square-summable) stepsize is used, then
# astart is used as the fixed scaling factor for the stepsize sequence
# astart * ( 1 / i ).
#
# - tau (real scalar, optional, default value 0.95): if eps > 0, i.e.,
# a target-level Polyak stepsize with nonvanishing threshold is used,
# then delta^{i + 1} = delta^i * tau each time
# f( x^{i + 1} ) > f^i_{ref} - delta^i
#
# - MaxFeval (integer scalar, optional, default value 300): the maximum
# number of function evaluations (hence, iterations, since there is
# exactly one function evaluation per iteration).
#
# - MInf (real scalar, optional, default value -Inf): if the algorithm
# determines a value for f() <= MInf this is taken as an indication that
# the problem is unbounded below and computation is stopped
# (a "finite -Inf").
#
# - mina (real scalar, optional, default value 1e-16): if the algorithm
# determines a stepsize value <= mina, this is taken as the fact that the
# algorithm has already obtained the most it can and computation is
# stopped. It is legal to take mina = 0.
#
# Output:
#
# - x ([ n x 1 ] real column vector): the best solution found so far.
#
# - status (string): a string describing the status of the algorithm at
# termination
#
# = 'optimal': the algorithm terminated having proven that x is a(n
# approximately) optimal solution; this only happens when "cheating",
# i.e., explicitly uses v_* = f( [] ) > -Inf, unless in the very
# unlikely case that f() spontaneously produces an almost-null
# subgradient
#
# = 'unbounded': the algorithm has determined an extrenely large negative
# value for f() that is taken as an indication that the problem is
# unbounded below (a "finite -Inf", see MInf above)
#
# = 'stopped': the algorithm terminated having exhausted the maximum
# number of iterations: x is the bast solution found so far, but not
# necessarily the optimal one
#
#{
# =======================================
# Author: Antonio Frangioni
# Date: 17-11-22
# Version 1.11
# Copyright Antonio Frangioni
# =======================================
#}
# Plotf = 1;
# 0 = nothing is plotted
# 1 = the level sets of f and the trajectory are plotted (when n = 2)
# 2 = the function value / gap are plotted
Interactive = false # if we pause at every iteration
# reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
local gap
PXY = Matrix{Real}(undef, 2, 0)
status = "error"
if isnothing(x)
(fStar, x, _) = f(nothing)
else
(fStar, _, _) = f(nothing)
end
n = size(x, 1)
if eps < 0 && fStar == - Inf
# no way of cheating since the true optimal value is unknonw
eps = - eps # revert to ordinary target level stepsize
end
if astart 0
error("astart must be > 0")
end
if tau 0 || tau 1
error("tau is not in (0 ,1)")
end
if mina < 0
error("mina is < 0")
end
# initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if printing
@printf("Subradient method\n")
if fStar > - Inf
@printf("iter\trel gap\t\tbest gap\t|| g(x) ||\ta\n\n")
else
@printf("iter\tf(x)\t\tf best\t\t|| g(x) ||\ta\n\n")
end
end
if Plotf == 2
gap = []
end
if Plotf > 1 && isnothing(plt)
plt = plot(xlims=(0, MaxFeval))
elseif isnothing(plt)
plt = plot()
end
# main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iter = 1
xref = x
fref = Inf # best f-value found so far
if eps > 0
delta = 0 # required displacement from fref
end
while true
# compute function and subgradient- - - - - - - - - - - - - - - - - - - - -
(v, g, _) = f(x)
ng = norm(g)
if eps > 0 # target-level stepsize
if v fref - delta # found a "significantly" better point
delta = astart * max(abs(v), 1) # reset delta
else # decrease delta
delta = max(delta * tau, eps * max(abs(min(v, fref)), 1))
end
end
if v < fref # found a better f-value (however slightly better)
fref = v # update fref
xref = x # this is the incumbent solution
end
# output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if fStar > -Inf
gapk = (v - fStar)/max(abs(fStar), 1)
bstgapk = (fref - fStar)/max(abs(fStar), 1)
if printing
@printf("%4d\t%1.4e\t%1.4e\t%1.4e", iter, gapk, bstgapk, ng)
end
if Plotf == 2
push!(gap, gapk)
end
else
if printing
@printf("%4d\t%1.8e\t%1.8e\t\t%1.4e", iter, fref, v, ng)
end
if Plotf == 2
push!(gap, v)
end
end
# stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
if eps < 0 && fref - fStar - eps * max(abs(fStar), 1)
xref = x
status = "optimal"
if printing
@printf("\n")
end
break
end
if ng < 1e-12 # unlikely, but it could happen
xref = x
status = "optimal"
if printing
@printf("\n")
end
break;
end
if iter > MaxFeval
status = "stopped"
if printing
@printf("\n")
end
break
end
# compute stepsize- - - - - - - - - - - - - - - - - - - - - - - - - - -
if eps > 0 # Polyak stepsize with target level
a = ( v - fref + delta ) / ( ng * ng )
elseif eps < 0 # true Polyak stepsize (cheating)
a = ( v - fStar ) / ( ng * ng )
else # diminishing square-summable stepsize
a = astart * ( 1 / iter )
end
# output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
if printing
@printf("\t%1.4e", a)
@printf("\n")
end
if a mina
status = "stopped"
if printing
@printf("\n")
end
break
end
if v MInf
status = "unbounded"
if printing
@printf("\n")
end
break
end
# compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
# possibly plot the trajectory
if n == 2 && Plotf == 1
PXY = hcat(PXY, hcat(x, x - a * g))
end
x = x - a * g
# iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iter += 1;
if Interactive
readline()
end
end
# end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
x = xref # return point corresponding to best value found so far
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if plotatend
if Plotf 2
plot!(plt, gap)
elseif Plotf == 1 && n == 2
plot!(plt, PXY[1, :], PXY[2, :])
end
display(plt)
end
return (x, status)
end # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

3633
11-23/lesson.ipynb generated Normal file

File diff suppressed because one or more lines are too long