Files
cmdla/project/L-BFGS/BFGS.jl
2024-07-30 14:43:25 +02:00

314 lines
8.6 KiB
Julia
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module BFGS
using LinearAlgebra: norm, I, dot, diagm, mul!
using ..OracleFunction
export BroydenFletcherGoldfarbShanno, BroydenFletcherGoldfarbShannoDogleg
const armijiowolfeorexact = :exact
BFGSorDFP = :BFGS
function ArmijoWolfeLineSearch(
f::Union{LeastSquaresF, OracleF},
x::AbstractArray,
p::AbstractArray,
MaxEvaluations::Integer;
αinit::Real=1,
τ::Real=1.1,
c1::Real=1e-4,
c2::Real=0.9,
ϵα::Real=1e-16,
ϵgrad::Real=1e-12,
safeguard::Real=0.20,
)::Tuple{Real, Integer}
ϕ = (α) -> begin
v = f.eval(x + α * p)
gradient = f.grad(x + α * p)
return (v, dot(p, gradient))
end
α = αinit
local αgrad
ϕ_0, ϕd_0 = ϕ(0)
while MaxEvaluations > 0
αcurr, αgrad = ϕ(α)
MaxEvaluations -= 2
if (αcurr ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) -c2 * ϕd_0)
return (α, MaxEvaluations)
end
if αgrad 0
break
end
α *= τ
end
αlo = 0
αhi = α
αlograd = ϕd_0
αhigrad = αgrad
while (MaxEvaluations > 0) && (αhi - αlo) > ϵα && (αgrad > ϵgrad)
α = (αlo * αhigrad - αhi * αlograd)/(αhigrad - αlograd)
α = max(
αlo + (αhi - αlo) * safeguard,
min(αhi - (αhi - αlo) * safeguard, α)
)
αcurr, αgrad = ϕ(α)
MaxEvaluations -= 2
if (αcurr ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) -c2 * ϕd_0)
break
end
if αgrad < 0
αlo = α
αlograd = αgrad
else
αhi = α
if αhi ϵα
break
end
αhigrad = αgrad
end
end
return (α, MaxEvaluations)
end
function ExactLineSearch(
f::LeastSquaresF,
x::AbstractArray,
p::AbstractArray,
MaxEvaluations::Integer
)
MaxEvaluations -= 1
return (tomography(f, x, p), MaxEvaluations)
end
@doc raw"""
```julia
BroydenFletcherGoldfarbShanno(f::Union{LeastSquaresF, OracleF}, [x::AbstractVector{T}, ϵ::T=1e-6, MaxEvaluations::Integer=10000])
```
Computes the minimum of the input function `f`.
### Input
- `f` -- the input function to minimize.
- `x` -- the starting point, if not specified the default one for the function `f` is used.
- `ϵ` -- the tollerance for the stopping criteria.
- `m` -- maximum number of vector to store that compute the approximate hessian.
- `MaxEvaluations` -- maximum number of function evaluations. Both ```f.eval``` and ```f.grad``` are counted.
### Output
A named tuple containing:
- `x` -- the minimum found
- `eval` -- the value of the function at the minimum
- `grad` -- the gradient of the function at the minimum
- `RemainingEvaluations` -- the number of function evaluation not used.
"""
function BroydenFletcherGoldfarbShanno(
f::Union{LeastSquaresF, OracleF};
x::Union{Nothing, AbstractVector{T}}=nothing,
ϵ::T=1e-6,
MaxEvaluations::Integer=10000
)::NamedTuple where {T}
if isnothing(x)
x = f.starting_point
end
gradient = f.grad(x)
MaxEvaluations -= 1
normgradient0 = norm(gradient)
H = diagm(ones(length(x)))
tmp1 = similar(H)
tmp2 = similar(H)
firstEvaluation = true
while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
p = -H * gradient # direction
α, MaxEvaluations =
if armijiowolfeorexact === :armijiowolfe || f isa OracleF
ArmijoWolfeLineSearch(f, x, p, MaxEvaluations)
elseif armijiowolfeorexact === :exact
ExactLineSearch(f, x, p, MaxEvaluations)
end
previousx = x
x = x + α * p
previousgradient = gradient
gradient = f.grad(x)
MaxEvaluations -= 1
s = x - previousx
y = gradient - previousgradient
ρ = inv(dot(y, s))
# if its the first iteration then set H to an aproximation of the Hessian
if firstEvaluation
mul!(H, I, dot(y, s)/dot(y, y))
firstEvaluation = false
end
if BFGSorDFP == :DFP
# DFP update -------------------------------------------
# H = H - (H * y * y' * H)/(y' * H * y) + (s * s')/(y' * s)
mul!(tmp1, H * y * y', H)
mul!(tmp2, s, s')
H .+= -tmp1/dot(y, H, y) .+ ρ * tmp2
elseif BFGSorDFP == :BFGS
# BFGS update ------------------------------------------
# H = (I - ρ * s * y') * H * (I - ρ * y * s') + ρ * s * s'
mul!(tmp1, H * y, s')
mul!(tmp2, s, s')
H .+= ρ * ((1 + ρ * dot(y, H, y)) .* tmp2 .- tmp1 .- tmp1')
end
end
return (;
:x => x,
:eval => f.eval(x),
:grad => gradient,
:RemainingEvaluations => MaxEvaluations)
end
function BroydenFletcherGoldfarbShannoDogleg(
f::Union{LeastSquaresF, OracleF};
x::Union{Nothing, AbstractVector{T}}=nothing,
ϵ::T=1e-6,
MaxEvaluations::Integer=10000
)::NamedTuple where {T}
if isnothing(x)
x = f.starting_point
end
Δ = 1 # initial size of trust region
smallestΔ = 1e-4 # smallest size where linear aproximation is applied
gradient = f.grad(x)
MaxEvaluations -= 1
normgradient0 = norm(gradient)
normgradient = normgradient0
H = diagm(ones(length(x)))
B = copy(H)
tmp1 = similar(H)
tmp2 = similar(H)
tmp3 = similar(H)
firstEvaluation = true
while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
# compute s by solving the subproblem min_s grad' * s + 0.5 s' * B * s with norm(s) ≤ Δ
CauchyPoint = -(Δ/normgradient) * gradient
τ = if dot(gradient, B, gradient) 0
1
else
min((normgradient^3)/(Δ * dot(gradient, B, gradient)), 1)
end
if Δ smallestΔ || B == I
# the Cauchy point is enought for small regions (linear aproximation)
s = τ * CauchyPoint
else
pB = -H * gradient
pU = -dot(gradient, gradient)/dot(gradient, B, gradient) * gradient
if norm(pB) Δ
# the region is larger than the dogleg
s = pB
elseif Δ norm(pU)
# the region is smaller than the first step
s = Δ/norm(pU) * pU
else
# solve the quadratic sistem for the dogleg
one = dot(pU, (pB - pU))
two = dot(pB - pU, pB - pU)
three = dot(pU, pU)
τ = (-one+two + sqrt(one^2 - three * two + two * Δ^2))/two
s = pU + (τ - 1) * (pB - pU)
end
end
previousx = x
x = x + s
previousgradient = gradient
gradient = f.grad(x)
normgradient = norm(gradient)
MaxEvaluations -= 1
y = gradient - previousgradient
ρ = inv(dot(y, s))
ared = f.eval(x) - f.eval(x + s) # actual reduction
pred = -(dot(gradient, s) + 0.5 * dot(s, B, s)) # predicted reduction
MaxEvaluations -= 2
# expand or contract the region
if (0.75 < ared/pred) && (0.8 * Δ < norm(s))
Δ = 2 * Δ
elseif (ared/pred < 0.1)
Δ = 0.5 * Δ
end
# if its the first iteration then set H to an aproximation of the Hessian
if firstEvaluation
mul!(H, I, dot(y, s)/dot(y, y))
firstEvaluation = false
end
if BFGSorDFP == :DFP
# DFP update -------------------------------------------
# H = H - (H * y * y' * H)/(y' * H * y) + (s * s')/(y' * s)
mul!(tmp1, H * y * y', H)
mul!(tmp2, s, s')
H .+= -tmp1/dot(y, H, y) .+ ρ * tmp2
mul!(tmp1, y, s')
tmp2 = I - ρ * tmp1
mul!(tmp1, tmp2, B)
mul!(tmp3, tmp1, tmp2')
mul!(tmp2, y, y')
B .= tmp3 .+ ρ * tmp2
elseif BFGSorDFP == :BFGS
# BFGS update ------------------------------------------
# H = (I - ρ * s * y') * H * (I - ρ * y * s') + ρ * s * s'
mul!(tmp1, H * y, s')
mul!(tmp2, s, s')
H .+= ρ * ((1 + ρ * dot(y, H, y)) .* tmp2 .- tmp1 .- tmp1')
mul!(tmp1, B * s * s', B)
mul!(tmp2, y, y')
B .+= -tmp1/dot(s, B, s) .+ ρ * tmp2
end
end
return (;
:x => x,
:eval => f.eval(x),
:grad => gradient,
:RemainingEvaluations => MaxEvaluations)
end
end # module BFGS