267 lines
8.5 KiB
Julia
267 lines
8.5 KiB
Julia
using LinearAlgebra, Printf, Plots
|
|
|
|
function HBG(f;
|
|
x::Union{Nothing, Vector}=nothing,
|
|
alpha::Real=1,
|
|
beta::Real=0.9,
|
|
eps::Real=1e-6,
|
|
MaxIter::Integer=300,
|
|
MInf::Real=-Inf,
|
|
plt::Union{Plots.Plot, Nothing}=nothing,
|
|
plotatend::Bool=true,
|
|
Plotf::Integer=0,
|
|
printing::Bool=true)::Tuple{AbstractArray, String}
|
|
#function [ x , status ] = HBG( f , x , alpha , beta , eps , MaxIter ,
|
|
# MInf )
|
|
#
|
|
# Apply a Heavy Ball Gradient approach for the minimization of the
|
|
# provided function f, which must have the following interface:
|
|
#
|
|
# [ v , g ] = f( x )
|
|
#
|
|
# Input:
|
|
#
|
|
# - x is either a [ n x 1 ] real (column) vector denoting the input of
|
|
# f(), or [] (empty).
|
|
#
|
|
# Output:
|
|
#
|
|
# - v (real, scalar): if x == [] this is the best known lower bound on
|
|
# the unconstrained global optimum of f(); it can be -Inf if either f()
|
|
# is not bounded below, or no such information is available. If x ~= []
|
|
# then v = f(x).
|
|
#
|
|
# - g (real, [ n x 1 ] real vector): this also depends on x. if x == []
|
|
# this is the standard starting point from which the algorithm should
|
|
# start, otherwise it is the gradient of f() at x (or a subgradient if
|
|
# f() is not differentiable at x, which it should not be if you are
|
|
# applying the gradient method to it).
|
|
#
|
|
# The other [optional] input parameters are:
|
|
#
|
|
# - x (either [ n x 1 ] real vector or [], default []): starting point.
|
|
# If x == [], the default starting point provided by f() is used.
|
|
#
|
|
# - alpha (real scalar, optional, default value 1): the fixed stepsize of
|
|
# the Heavy Ball Gradient approach (along the anti-gradient).
|
|
#
|
|
# - beta (real scalar, optional, default value 0.9): the fixed weight of
|
|
# the momentum term
|
|
#
|
|
# beta * || x^i - x^{i - 1} ||
|
|
#
|
|
# Note that beta has to be >= 0, although 0 is accepted which turns the
|
|
# Heavy Ball Gradient approach into a "Light" Ball Gradient approach,
|
|
# i.e., a standard Gradient approach with fixed stepsize.
|
|
#
|
|
# - eps (real scalar, optional, default value 1e-6): the accuracy in the
|
|
# stopping criterion: the algorithm is stopped when the norm of the
|
|
# gradient is less than or equal to eps. If a negative value is provided,
|
|
# this is used in a *relative* stopping criterion: the algorithm is
|
|
# stopped when the norm of the gradient is less than or equal to
|
|
# (- eps) * || norm of the first gradient ||.
|
|
#
|
|
# - MaxIter (integer scalar, optional, default value 300): the maximum
|
|
# number of iterations == function evaluations.
|
|
#
|
|
# - MInf (real scalar, optional, default value -Inf): if the algorithm
|
|
# determines a value for f() <= MInf this is taken as an indication that
|
|
# the problem is unbounded below and computation is stopped
|
|
# (a "finite -Inf").
|
|
#
|
|
# Output:
|
|
#
|
|
# - x ([ n x 1 ] real column vector): the best solution found so far.
|
|
#
|
|
# - status (string): a string describing the status of the algorithm at
|
|
# termination
|
|
#
|
|
# = 'optimal': the algorithm terminated having proven that x is a(n
|
|
# approximately) optimal solution, i.e., the norm of the gradient at x
|
|
# is less than the required threshold
|
|
#
|
|
# = 'unbounded': the algorithm has determined an extrenely large negative
|
|
# value for f() that is taken as an indication that the problem is
|
|
# unbounded below (a "finite -Inf", see MInf above)
|
|
#
|
|
# = 'stopped': the algorithm terminated having exhausted the maximum
|
|
# number of iterations: x is the bast solution found so far, but not
|
|
# necessarily the optimal one
|
|
#
|
|
# = 'error': the algorithm found a numerical error that prevents it from
|
|
# continuing optimization (see mina above)
|
|
#
|
|
#{
|
|
# =======================================
|
|
# Author: Antonio Frangioni
|
|
# Date: 10-11-22
|
|
# Version 1.01
|
|
# Copyright Antonio Frangioni
|
|
# =======================================
|
|
#}
|
|
|
|
# Plotf = 1;
|
|
# 0 = nothing is plotted
|
|
# 1 = the level sets of f and the trajectory are plotted (when n = 2)
|
|
# 2 = the function value / gap are plotted
|
|
|
|
local gap
|
|
if Plotf == 2
|
|
gap = []
|
|
end
|
|
PXY = Matrix{Real}(undef, 2, 0)
|
|
|
|
Interactive = false # if we pause at every iteration
|
|
|
|
# reading and checking input- - - - - - - - - - - - - - - - - - - - - - - -
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
if isnothing(x)
|
|
(fStar, x, _) = f(nothing)
|
|
else
|
|
(fStar, _, _) = f(nothing)
|
|
end
|
|
|
|
n = size(x, 1)
|
|
|
|
if alpha ≤ 0
|
|
error("alpha must be positive")
|
|
end
|
|
|
|
if beta < 0
|
|
error("beta must be non-negative")
|
|
end
|
|
|
|
# initializations - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
if printing
|
|
@printf("Heavy Ball Gradient method\n")
|
|
if fStar > -Inf
|
|
@printf("feval\trel gap\t\tbest gap")
|
|
else
|
|
@printf("feval\tf(x)\tfbest")
|
|
end
|
|
@printf("\t|| g(x) ||\n\n")
|
|
end
|
|
|
|
if Plotf == 2 && isnothing(plt)
|
|
plt = plot(xlims=(0, MaxIter))
|
|
elseif isnothing(plt)
|
|
plt = plot()
|
|
end
|
|
|
|
(v, g, _) = f(x)
|
|
ng = norm(g)
|
|
vbest = v
|
|
local ng0
|
|
if eps < 0
|
|
ng0 = -ng # norm of first subgradient: why is there a "-"? ;-)
|
|
else
|
|
ng0 = 1 # un-scaled stopping criterion
|
|
end
|
|
|
|
pastd = zeros(n) # the direction at the previous iteration
|
|
feval = 1 # f() evaluations count
|
|
|
|
status = "error"
|
|
|
|
# main loop - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
while true
|
|
# output statistics - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
if fStar > -Inf
|
|
gapk = (v - fStar)/max(abs(fStar), 1)
|
|
bstgapk = (vbest - fStar)/max(abs(fStar), 1)
|
|
|
|
if printing
|
|
@printf("%4d\t%1.4e\t%1.4e\t%1.4e\n", feval, gapk, bstgapk, ng)
|
|
end
|
|
|
|
if Plotf == 2
|
|
push!(gap, gapk)
|
|
end
|
|
else
|
|
if printing
|
|
@printf("%4d\t%1.8e\t%1.8e\t\t%1.4e\n", feval, v, vbest, ng)
|
|
end
|
|
|
|
if Plotf == 2
|
|
push!(gap, v)
|
|
end
|
|
end
|
|
|
|
# stopping criteria - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
if ng ≤ eps * ng0
|
|
status = "optimal"
|
|
break
|
|
end
|
|
|
|
if feval > MaxIter
|
|
status = "stopped"
|
|
break
|
|
end
|
|
|
|
if v ≤ MInf
|
|
status = "unbounded"
|
|
break
|
|
end
|
|
|
|
# compute deflected gradient direction- - - - - - - - - - - - - - - - -
|
|
|
|
d = -alpha * g .+ beta * pastd
|
|
|
|
# compute new point - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
# possibly plot the trajectory
|
|
if n == 2 && Plotf == 1
|
|
PXY = hcat(PXY, hcat(x, x + d))
|
|
end
|
|
|
|
x += d
|
|
pastd .= d
|
|
|
|
# compute f() - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
(v, g, _) = f(x)
|
|
ng = norm(g)
|
|
if v < vbest
|
|
vbest = v
|
|
end
|
|
feval += 1
|
|
|
|
# iterate - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
if Interactive
|
|
l = readline()
|
|
if l == "exit"
|
|
break
|
|
end
|
|
end
|
|
end
|
|
|
|
# end of main loop- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# inner functions - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
|
if plotatend
|
|
if Plotf ≥ 2
|
|
plot!(plt, gap)
|
|
elseif Plotf == 1 && n == 2
|
|
plot!(plt, PXY[1, :], PXY[2, :])
|
|
end
|
|
display(plt)
|
|
end
|
|
|
|
return (x, status)
|
|
end # the end- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|