314 lines
8.6 KiB
Julia
314 lines
8.6 KiB
Julia
|
|
module BFGS
|
|||
|
|
|
|||
|
|
using LinearAlgebra: norm, I, dot, diagm, mul!
|
|||
|
|
|
|||
|
|
using ..OracleFunction
|
|||
|
|
|
|||
|
|
export BroydenFletcherGoldfarbShanno, BroydenFletcherGoldfarbShannoDogleg
|
|||
|
|
|
|||
|
|
const armijiowolfeorexact = :exact
|
|||
|
|
BFGSorDFP = :BFGS
|
|||
|
|
|
|||
|
|
function ArmijoWolfeLineSearch(
|
|||
|
|
f::Union{LeastSquaresF, OracleF},
|
|||
|
|
x::AbstractArray,
|
|||
|
|
p::AbstractArray,
|
|||
|
|
MaxEvaluations::Integer;
|
|||
|
|
αinit::Real=1,
|
|||
|
|
τ::Real=1.1,
|
|||
|
|
c1::Real=1e-4,
|
|||
|
|
c2::Real=0.9,
|
|||
|
|
ϵα::Real=1e-16,
|
|||
|
|
ϵgrad::Real=1e-12,
|
|||
|
|
safeguard::Real=0.20,
|
|||
|
|
)::Tuple{Real, Integer}
|
|||
|
|
|
|||
|
|
ϕ = (α) -> begin
|
|||
|
|
v = f.eval(x + α * p)
|
|||
|
|
gradient = f.grad(x + α * p)
|
|||
|
|
return (v, dot(p, gradient))
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
α = αinit
|
|||
|
|
local αgrad
|
|||
|
|
|
|||
|
|
ϕ_0, ϕd_0 = ϕ(0)
|
|||
|
|
|
|||
|
|
while MaxEvaluations > 0
|
|||
|
|
αcurr, αgrad = ϕ(α)
|
|||
|
|
MaxEvaluations -= 2
|
|||
|
|
|
|||
|
|
if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
|
|||
|
|
return (α, MaxEvaluations)
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
if αgrad ≥ 0
|
|||
|
|
break
|
|||
|
|
end
|
|||
|
|
α *= τ
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
αlo = 0
|
|||
|
|
αhi = α
|
|||
|
|
αlograd = ϕd_0
|
|||
|
|
αhigrad = αgrad
|
|||
|
|
|
|||
|
|
while (MaxEvaluations > 0) && (αhi - αlo) > ϵα && (αgrad > ϵgrad)
|
|||
|
|
α = (αlo * αhigrad - αhi * αlograd)/(αhigrad - αlograd)
|
|||
|
|
α = max(
|
|||
|
|
αlo + (αhi - αlo) * safeguard,
|
|||
|
|
min(αhi - (αhi - αlo) * safeguard, α)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
αcurr, αgrad = ϕ(α)
|
|||
|
|
MaxEvaluations -= 2
|
|||
|
|
|
|||
|
|
if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
|
|||
|
|
break
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
if αgrad < 0
|
|||
|
|
αlo = α
|
|||
|
|
αlograd = αgrad
|
|||
|
|
else
|
|||
|
|
αhi = α
|
|||
|
|
if αhi ≤ ϵα
|
|||
|
|
break
|
|||
|
|
end
|
|||
|
|
αhigrad = αgrad
|
|||
|
|
end
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
return (α, MaxEvaluations)
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
function ExactLineSearch(
|
|||
|
|
f::LeastSquaresF,
|
|||
|
|
x::AbstractArray,
|
|||
|
|
p::AbstractArray,
|
|||
|
|
MaxEvaluations::Integer
|
|||
|
|
)
|
|||
|
|
MaxEvaluations -= 1
|
|||
|
|
return (tomography(f, x, p), MaxEvaluations)
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
|
|||
|
|
@doc raw"""
|
|||
|
|
```julia
|
|||
|
|
BroydenFletcherGoldfarbShanno(f::Union{LeastSquaresF, OracleF}, [x::AbstractVector{T}, ϵ::T=1e-6, MaxEvaluations::Integer=10000])
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
Computes the minimum of the input function `f`.
|
|||
|
|
|
|||
|
|
### Input
|
|||
|
|
|
|||
|
|
- `f` -- the input function to minimize.
|
|||
|
|
- `x` -- the starting point, if not specified the default one for the function `f` is used.
|
|||
|
|
- `ϵ` -- the tollerance for the stopping criteria.
|
|||
|
|
- `m` -- maximum number of vector to store that compute the approximate hessian.
|
|||
|
|
- `MaxEvaluations` -- maximum number of function evaluations. Both ```f.eval``` and ```f.grad``` are counted.
|
|||
|
|
|
|||
|
|
### Output
|
|||
|
|
|
|||
|
|
A named tuple containing:
|
|||
|
|
- `x` -- the minimum found
|
|||
|
|
- `eval` -- the value of the function at the minimum
|
|||
|
|
- `grad` -- the gradient of the function at the minimum
|
|||
|
|
- `RemainingEvaluations` -- the number of function evaluation not used.
|
|||
|
|
|
|||
|
|
"""
|
|||
|
|
function BroydenFletcherGoldfarbShanno(
|
|||
|
|
f::Union{LeastSquaresF, OracleF};
|
|||
|
|
x::Union{Nothing, AbstractVector{T}}=nothing,
|
|||
|
|
ϵ::T=1e-6,
|
|||
|
|
MaxEvaluations::Integer=10000
|
|||
|
|
)::NamedTuple where {T}
|
|||
|
|
|
|||
|
|
if isnothing(x)
|
|||
|
|
x = f.starting_point
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
gradient = f.grad(x)
|
|||
|
|
MaxEvaluations -= 1
|
|||
|
|
normgradient0 = norm(gradient)
|
|||
|
|
H = diagm(ones(length(x)))
|
|||
|
|
tmp1 = similar(H)
|
|||
|
|
tmp2 = similar(H)
|
|||
|
|
|
|||
|
|
firstEvaluation = true
|
|||
|
|
|
|||
|
|
while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
|
|||
|
|
p = -H * gradient # direction
|
|||
|
|
|
|||
|
|
α, MaxEvaluations =
|
|||
|
|
if armijiowolfeorexact === :armijiowolfe || f isa OracleF
|
|||
|
|
ArmijoWolfeLineSearch(f, x, p, MaxEvaluations)
|
|||
|
|
elseif armijiowolfeorexact === :exact
|
|||
|
|
ExactLineSearch(f, x, p, MaxEvaluations)
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
previousx = x
|
|||
|
|
x = x + α * p
|
|||
|
|
|
|||
|
|
previousgradient = gradient
|
|||
|
|
gradient = f.grad(x)
|
|||
|
|
MaxEvaluations -= 1
|
|||
|
|
|
|||
|
|
s = x - previousx
|
|||
|
|
y = gradient - previousgradient
|
|||
|
|
ρ = inv(dot(y, s))
|
|||
|
|
|
|||
|
|
# if its the first iteration then set H to an aproximation of the Hessian
|
|||
|
|
if firstEvaluation
|
|||
|
|
mul!(H, I, dot(y, s)/dot(y, y))
|
|||
|
|
firstEvaluation = false
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
if BFGSorDFP == :DFP
|
|||
|
|
# DFP update -------------------------------------------
|
|||
|
|
# H = H - (H * y * y' * H)/(y' * H * y) + (s * s')/(y' * s)
|
|||
|
|
|
|||
|
|
mul!(tmp1, H * y * y', H)
|
|||
|
|
mul!(tmp2, s, s')
|
|||
|
|
H .+= -tmp1/dot(y, H, y) .+ ρ * tmp2
|
|||
|
|
elseif BFGSorDFP == :BFGS
|
|||
|
|
# BFGS update ------------------------------------------
|
|||
|
|
# H = (I - ρ * s * y') * H * (I - ρ * y * s') + ρ * s * s'
|
|||
|
|
|
|||
|
|
mul!(tmp1, H * y, s')
|
|||
|
|
mul!(tmp2, s, s')
|
|||
|
|
H .+= ρ * ((1 + ρ * dot(y, H, y)) .* tmp2 .- tmp1 .- tmp1')
|
|||
|
|
end
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
return (;
|
|||
|
|
:x => x,
|
|||
|
|
:eval => f.eval(x),
|
|||
|
|
:grad => gradient,
|
|||
|
|
:RemainingEvaluations => MaxEvaluations)
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
|
|||
|
|
function BroydenFletcherGoldfarbShannoDogleg(
|
|||
|
|
f::Union{LeastSquaresF, OracleF};
|
|||
|
|
x::Union{Nothing, AbstractVector{T}}=nothing,
|
|||
|
|
ϵ::T=1e-6,
|
|||
|
|
MaxEvaluations::Integer=10000
|
|||
|
|
)::NamedTuple where {T}
|
|||
|
|
|
|||
|
|
if isnothing(x)
|
|||
|
|
x = f.starting_point
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
Δ = 1 # initial size of trust region
|
|||
|
|
smallestΔ = 1e-4 # smallest size where linear aproximation is applied
|
|||
|
|
|
|||
|
|
gradient = f.grad(x)
|
|||
|
|
MaxEvaluations -= 1
|
|||
|
|
normgradient0 = norm(gradient)
|
|||
|
|
normgradient = normgradient0
|
|||
|
|
H = diagm(ones(length(x)))
|
|||
|
|
B = copy(H)
|
|||
|
|
tmp1 = similar(H)
|
|||
|
|
tmp2 = similar(H)
|
|||
|
|
tmp3 = similar(H)
|
|||
|
|
|
|||
|
|
firstEvaluation = true
|
|||
|
|
|
|||
|
|
while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
|
|||
|
|
# compute s by solving the subproblem min_s grad' * s + 0.5 s' * B * s with norm(s) ≤ Δ
|
|||
|
|
CauchyPoint = -(Δ/normgradient) * gradient
|
|||
|
|
τ = if dot(gradient, B, gradient) ≤ 0
|
|||
|
|
1
|
|||
|
|
else
|
|||
|
|
min((normgradient^3)/(Δ * dot(gradient, B, gradient)), 1)
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
if Δ ≤ smallestΔ || B == I
|
|||
|
|
# the Cauchy point is enought for small regions (linear aproximation)
|
|||
|
|
s = τ * CauchyPoint
|
|||
|
|
else
|
|||
|
|
pB = -H * gradient
|
|||
|
|
pU = -dot(gradient, gradient)/dot(gradient, B, gradient) * gradient
|
|||
|
|
|
|||
|
|
if norm(pB) ≤ Δ
|
|||
|
|
# the region is larger than the dogleg
|
|||
|
|
s = pB
|
|||
|
|
elseif Δ ≤ norm(pU)
|
|||
|
|
# the region is smaller than the first step
|
|||
|
|
s = Δ/norm(pU) * pU
|
|||
|
|
else
|
|||
|
|
# solve the quadratic sistem for the dogleg
|
|||
|
|
one = dot(pU, (pB - pU))
|
|||
|
|
two = dot(pB - pU, pB - pU)
|
|||
|
|
three = dot(pU, pU)
|
|||
|
|
|
|||
|
|
τ = (-one+two + sqrt(one^2 - three * two + two * Δ^2))/two
|
|||
|
|
s = pU + (τ - 1) * (pB - pU)
|
|||
|
|
end
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
previousx = x
|
|||
|
|
x = x + s
|
|||
|
|
|
|||
|
|
previousgradient = gradient
|
|||
|
|
gradient = f.grad(x)
|
|||
|
|
normgradient = norm(gradient)
|
|||
|
|
MaxEvaluations -= 1
|
|||
|
|
|
|||
|
|
y = gradient - previousgradient
|
|||
|
|
ρ = inv(dot(y, s))
|
|||
|
|
|
|||
|
|
ared = f.eval(x) - f.eval(x + s) # actual reduction
|
|||
|
|
pred = -(dot(gradient, s) + 0.5 * dot(s, B, s)) # predicted reduction
|
|||
|
|
MaxEvaluations -= 2
|
|||
|
|
|
|||
|
|
# expand or contract the region
|
|||
|
|
if (0.75 < ared/pred) && (0.8 * Δ < norm(s))
|
|||
|
|
Δ = 2 * Δ
|
|||
|
|
elseif (ared/pred < 0.1)
|
|||
|
|
Δ = 0.5 * Δ
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
# if its the first iteration then set H to an aproximation of the Hessian
|
|||
|
|
if firstEvaluation
|
|||
|
|
mul!(H, I, dot(y, s)/dot(y, y))
|
|||
|
|
firstEvaluation = false
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
if BFGSorDFP == :DFP
|
|||
|
|
# DFP update -------------------------------------------
|
|||
|
|
# H = H - (H * y * y' * H)/(y' * H * y) + (s * s')/(y' * s)
|
|||
|
|
|
|||
|
|
mul!(tmp1, H * y * y', H)
|
|||
|
|
mul!(tmp2, s, s')
|
|||
|
|
H .+= -tmp1/dot(y, H, y) .+ ρ * tmp2
|
|||
|
|
|
|||
|
|
mul!(tmp1, y, s')
|
|||
|
|
tmp2 = I - ρ * tmp1
|
|||
|
|
mul!(tmp1, tmp2, B)
|
|||
|
|
mul!(tmp3, tmp1, tmp2')
|
|||
|
|
mul!(tmp2, y, y')
|
|||
|
|
B .= tmp3 .+ ρ * tmp2
|
|||
|
|
elseif BFGSorDFP == :BFGS
|
|||
|
|
# BFGS update ------------------------------------------
|
|||
|
|
# H = (I - ρ * s * y') * H * (I - ρ * y * s') + ρ * s * s'
|
|||
|
|
|
|||
|
|
mul!(tmp1, H * y, s')
|
|||
|
|
mul!(tmp2, s, s')
|
|||
|
|
H .+= ρ * ((1 + ρ * dot(y, H, y)) .* tmp2 .- tmp1 .- tmp1')
|
|||
|
|
|
|||
|
|
mul!(tmp1, B * s * s', B)
|
|||
|
|
mul!(tmp2, y, y')
|
|||
|
|
B .+= -tmp1/dot(s, B, s) .+ ρ * tmp2
|
|||
|
|
end
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
return (;
|
|||
|
|
:x => x,
|
|||
|
|
:eval => f.eval(x),
|
|||
|
|
:grad => gradient,
|
|||
|
|
:RemainingEvaluations => MaxEvaluations)
|
|||
|
|
end
|
|||
|
|
|
|||
|
|
end # module BFGS
|