194 lines
4.8 KiB
Julia
194 lines
4.8 KiB
Julia
module LBFGS
|
||
|
||
using LinearAlgebra: norm, I, dot
|
||
using DataStructures: CircularBuffer
|
||
|
||
using ..OracleFunction
|
||
|
||
export LimitedMemoryBFGS
|
||
|
||
const armijiowolfeorexact = :exact
|
||
|
||
function ArmijoWolfeLineSearch(
|
||
f::Union{LeastSquaresF, OracleF},
|
||
x::AbstractArray,
|
||
p::AbstractArray,
|
||
MaxEvaluations::Integer;
|
||
αinit::Real=1,
|
||
τ::Real=1.1,
|
||
c1::Real=1e-4,
|
||
c2::Real=0.9,
|
||
ϵα::Real=1e-16,
|
||
ϵgrad::Real=1e-12,
|
||
safeguard::Real=0.20,
|
||
)::Tuple{Real, Integer}
|
||
|
||
ϕ = (α) -> begin
|
||
v = f.eval(x + α * p)
|
||
gradient = f.grad(x + α * p)
|
||
return (v, dot(p, gradient))
|
||
end
|
||
|
||
α = αinit
|
||
local αgrad
|
||
|
||
ϕ_0, ϕd_0 = ϕ(0)
|
||
|
||
while MaxEvaluations > 0
|
||
αcurr, αgrad = ϕ(α)
|
||
MaxEvaluations -= 2
|
||
|
||
if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
|
||
return (α, MaxEvaluations)
|
||
end
|
||
|
||
if αgrad ≥ 0
|
||
break
|
||
end
|
||
α *= τ
|
||
end
|
||
|
||
αlo = 0
|
||
αhi = α
|
||
αlograd = ϕd_0
|
||
αhigrad = αgrad
|
||
|
||
while (MaxEvaluations > 0) && (αhi - αlo) > ϵα && (αgrad > ϵgrad)
|
||
α = (αlo * αhigrad - αhi * αlograd)/(αhigrad - αlograd)
|
||
α = max(
|
||
αlo + (αhi - αlo) * safeguard,
|
||
min(αhi - (αhi - αlo) * safeguard, α)
|
||
)
|
||
|
||
αcurr, αgrad = ϕ(α)
|
||
MaxEvaluations -= 2
|
||
|
||
if (αcurr ≤ ϕ_0 + c1 * α * ϕd_0) && (abs(αgrad) ≤ -c2 * ϕd_0)
|
||
break
|
||
end
|
||
|
||
if αgrad < 0
|
||
αlo = α
|
||
αlograd = αgrad
|
||
else
|
||
αhi = α
|
||
if αhi ≤ ϵα
|
||
break
|
||
end
|
||
αhigrad = αgrad
|
||
end
|
||
end
|
||
|
||
return (α, MaxEvaluations)
|
||
end
|
||
|
||
function ExactLineSearch(
|
||
f::LeastSquaresF,
|
||
x::AbstractArray,
|
||
p::AbstractArray,
|
||
MaxEvaluations::Integer
|
||
)
|
||
MaxEvaluations -= 1
|
||
return (tomography(f, x, p), MaxEvaluations)
|
||
end
|
||
|
||
@doc raw"""
|
||
```julia
|
||
LimitedMemoryBFGS(f::Union{LeastSquaresF{T}, OracleF{T, F, G}}, [x::AbstractVector{T}, ϵ::T=1e-6, m::Integer=3, MaxEvaluations::Integer=10000])
|
||
```
|
||
|
||
Computes the minimum of the input function `f`.
|
||
|
||
### Input
|
||
|
||
- `f` -- the input function to minimize.
|
||
- `x` -- the starting point, if not specified the default one for the function `f` is used.
|
||
- `ϵ` -- the tollerance for the stopping criteria.
|
||
- `m` -- maximum number of vector to store that compute the approximate hessian.
|
||
- `MaxEvaluations` -- maximum number of function evaluations. Both ```f.eval``` and ```f.grad``` are counted.
|
||
|
||
### Output
|
||
|
||
A named tuple containing:
|
||
- `x` -- the minimum found
|
||
- `eval` -- the value of the function at the minimum
|
||
- `grad` -- the gradient of the function at the minimum
|
||
- `RemainingEvaluations` -- the number of function evaluation not used.
|
||
|
||
See also [`QRhous`](@ref).
|
||
"""
|
||
function LimitedMemoryBFGS(
|
||
f::Union{LeastSquaresF, OracleF};
|
||
x::Union{Nothing, AbstractVector{T}}=nothing,
|
||
ϵ::T=1e-6,
|
||
m::Integer=3,
|
||
MaxEvaluations::Integer=10000
|
||
)::NamedTuple where {T}
|
||
|
||
if isnothing(x)
|
||
x = f.starting_point
|
||
end
|
||
|
||
gradient = f.grad(x)
|
||
MaxEvaluations -= 1
|
||
normgradient0 = norm(gradient)
|
||
H = CircularBuffer{NamedTuple}(m)
|
||
αstore = Array{eltype(x)}(undef, 0)
|
||
|
||
while MaxEvaluations > 0 && norm(gradient) > ϵ * normgradient0
|
||
# two loop recursion for finding the direction
|
||
q = gradient
|
||
empty!(αstore)
|
||
|
||
for i ∈ reverse(H)
|
||
push!(αstore, i[:ρ] * dot(i[:s], q))
|
||
q -= αstore[end] * i[:y]
|
||
end
|
||
# choose H0 as something resembling the hessian
|
||
H0 = if isempty(H)
|
||
I
|
||
else
|
||
(dot(H[end][:s], H[end][:y])/dot(H[end][:y], H[end][:y])) * I
|
||
end
|
||
r = H0 * q
|
||
for i ∈ H
|
||
βi = i[:ρ] * dot(i[:y], r)
|
||
r += i[:s] * (pop!(αstore) - βi)
|
||
end
|
||
p = -r # direction
|
||
|
||
if armijiowolfeorexact === :armijiowolfe || f isa OracleF
|
||
α, MaxEvaluations = ArmijoWolfeLineSearch(f, x, p, MaxEvaluations)
|
||
elseif armijiowolfeorexact === :exact
|
||
α, MaxEvaluations = ExactLineSearch(f, x, p, MaxEvaluations)
|
||
end
|
||
|
||
previousx = x
|
||
x = x + α * p
|
||
|
||
previousgradient = gradient
|
||
gradient = f.grad(x)
|
||
MaxEvaluations -= 1
|
||
|
||
s = x - previousx
|
||
y = gradient - previousgradient
|
||
|
||
curvature = dot(s, y)
|
||
ρ = inv(curvature)
|
||
|
||
if curvature ≤ 1e-16
|
||
empty!(H) # restart from the gradient
|
||
else
|
||
push!(H, (; :ρ => ρ, :y => y, :s => s))
|
||
end
|
||
end
|
||
|
||
return (;
|
||
:x => x,
|
||
:eval => f.eval(x),
|
||
:grad => gradient,
|
||
:RemainingEvaluations => MaxEvaluations)
|
||
end
|
||
|
||
end # module LBGGS
|